From 8420adcb97b09fe69d59c2d248bb76fd994e21ce Mon Sep 17 00:00:00 2001 From: Stephen Becker Date: Sat, 21 Dec 2013 17:58:29 -0500 Subject: [PATCH] Some major changes to fix a bug and change behavior; most of the changes are in tfocs_iterate and tfocs_initialize. To understand the changes, note that most algorithms use two sequences of variables, the "x" and the "y" sequence. The return value is the "x" iterate, but stopping criteria is via the "y" iterate often, since this value is in effect pre-computed. And if data recording was specified, this was done on the "x" variable at an additional cost. Now, the policy is to use whichever variable requries less computation (usually the "y" variable), but you can override this behavior if you wish: set opts.stopping_criteria_always_use_x = true to always use "x" to determine the stopping critier; set opts.data_collection_always_use_x = true to always use "x" for data collection; and set opts.output_always_use_x = true to always use "x" (and when appropriate, its corresponding dual variable). By default, we use whichever is cheaper, except for the output, in which case we use whichever gives a better objective value. Due to these major changes, many smaller changes were made to make the code work again. Also, the continuation script now uses the previous stepsize as the new stepsize. We have also added a new restart feature that uses the gradient, not function values. This is due to Brendan O'Donoghue's idea. This can be controlled by setting opts.autoRestart to either 'gra' or 'fun' for gradient or function criteria, respectively. These automatic restart criteria take effect whenever opts.restart < 0. Also new is opts.printRestart; if true (default), this will print output whenever a restart occurs. --- continuation.m | 7 +++ examples/smallscale/test_SLOPE.m | 3 +- private/tfocs_cleanup.m | 68 +++++++++++++++++---- private/tfocs_initialize.m | 28 ++++++++- private/tfocs_iterate.m | 102 +++++++++++++++++++++++-------- tfocs.m | 2 +- 6 files changed, 168 insertions(+), 42 deletions(-) diff --git a/continuation.m b/continuation.m index 8e32292..0699b49 100644 --- a/continuation.m +++ b/continuation.m @@ -87,6 +87,7 @@ % tol = finalTol2*betaTol^(kMax+1); % bug fixed Feb 2011 tol = finalTol2*betaTol^(kMax); +L0 = Inf; % for first round, use default value of stepsize xOld = x0; odataCumulative = []; odataCumulative.niter= 0; @@ -120,6 +121,9 @@ if k == kMax && ~isempty( finalRestart ) optsTemp.restart = finalRestart; end + if isfinite(L0) + optsTemp.L0 = L0; + end % call the solver [x, odata, optsOut ] = fcn( mu, x0, z0, optsTemp ); @@ -146,6 +150,9 @@ % (possibly) decrease mu for next solve mu = mu*muDecrement; + % Dec '13, use stepsize estimate + L0 = 1/odata.stepsize(end); + % update output data fields = { 'f', 'normGrad', 'stepsize','theta','counts','err' }; for ff = fields diff --git a/examples/smallscale/test_SLOPE.m b/examples/smallscale/test_SLOPE.m index 2b12b8d..6cc8692 100644 --- a/examples/smallscale/test_SLOPE.m +++ b/examples/smallscale/test_SLOPE.m @@ -82,7 +82,8 @@ min_x sum(lambda*sort(abs(x),'descend')) + .5||A(x)-b||_2^2 %% Call the TFOCS solver er = er_ref; % error with reference solution (from IPM) -opts = struct('restart',-Inf,'tol',1e-13,'maxits',1000, 'printEvery',10); +opts = struct('restart',-Inf,'autoRestart','gradient', ... + 'tol',1e-13,'maxits',1000, 'printEvery',10); opts.errFcn = { @(f,primal) er(primal), ... @(f,primal) f - obj_ref }; tic; diff --git a/private/tfocs_cleanup.m b/private/tfocs_cleanup.m index 3f35689..96eed93 100644 --- a/private/tfocs_cleanup.m +++ b/private/tfocs_cleanup.m @@ -2,29 +2,75 @@ % Performs the final set of operations for our templated solvers: performs % final calculations, prints final outputs, cleans up unused data. -if saddle, - if isempty(g_Ax), - [ f_x, g_Ax ] = apply_smooth(A_x); +% This should be called just after tfocs_iterate, so re-use some of that +% computation: + +% We have two sequences of points, the x and the y sequence +% Other things being equal, we will use the x sequence as output +% But in some cases, if we use y, there is less computation to do +% since we can re-use some older computation. If it is the case that +% computing y is cheap, then we will still compute x, but now we will +% compare x and y and take whichever is better (ties going toward x) + +if exist('cur_dual','var') + cur_dual = get_dual( cur_dual ); +end +if v_is_y && ~output_always_use_x + % We have some free information about y + % cur_pri = y + if data_collection_always_use_x + % f_vy and dual_y have already been saved + else + f_vy = f_v; + if saddle, dual_y = cur_dual; end + end +end +if ~v_is_x + if saddle, + if isempty(g_Ax), + [ f_x, g_Ax ] = apply_smooth(A_x); + end + cur_dual = get_dual( g_Ax ); + elseif isinf(f_x), + f_x = apply_smooth(A_x); + end + if isinf( C_x ), + C_x = apply_projector( x ); + end + f_v = maxmin * ( f_x + C_x ); + cur_pri = x; +end +% Now, compare x and y if info on y is available, and take whichever is better +x_or_y_string = 'x'; +if v_is_y && ~output_always_use_x + if f_vy < f_v + f_v = f_vy; + if saddle, cur_dual= dual_y; end + x = y; % losing information after this point! + x_or_y_string = 'y'; + end +end +if saddle + if ~exist('cur_dual','var') + if isempty(g_Ax), + [ f_x, g_Ax ] = apply_smooth(A_x); + end + cur_dual = get_dual( g_Ax ); end - out.dual = get_dual( g_Ax ); - % Oct 13: make it compatible: + out.dual = cur_dual; if isa( out.dual, 'tfocs_tuple') out.dual = cell( out.dual ); end -elseif isinf(f_x), - f_x = apply_smooth(A_x); end -if isinf( C_x ), - C_x = apply_projector( x ); -end if fid && printEvery, fprintf( fid, 'Finished: %s\n', status ); end out.niter = n_iter; out.status = status; +out.x_or_y = x_or_y_string; d.niter = 'Number of iterations'; if saveHist, - out.f(n_iter) = maxmin * ( f_x + C_x ); + out.f(n_iter) = f_v; out.f(n_iter+1:end) = []; out.normGrad(n_iter+1:end) = []; out.stepsize(n_iter+1:end) = []; diff --git a/private/tfocs_initialize.m b/private/tfocs_initialize.m index 4b15eb9..368df86 100644 --- a/private/tfocs_initialize.m +++ b/private/tfocs_initialize.m @@ -28,9 +28,14 @@ 'alg', 'AT' ,... 'restart', Inf ,... 'printStopCrit', false,... - 'cntr_reset', 50 ,... % how often to explicitly recompute A*x and A*y (set to Inf if you want ) + 'cntr_reset', -50 ,... % how often to explicitly recompute A*x and A*y (set to Inf if you want ) 'cg_restart', Inf ,... % for CG only 'cg_type', 'pr' ,... % for CG only + 'stopping_criteria_always_use_x', false, ... + 'data_collection_always_use_x', false, ... + 'output_always_use_x', false, ... + 'autoRestart', 'gra', ... % function or gradient + 'printRestart', true, ... 'debug', false .... ); @@ -69,6 +74,7 @@ desc.errFcn = 'Medium. User-specified error function. See user guide'; desc.beta = 'Medium. Backtracking parameter, in (0,1). No line search if >= 1'; desc.alpha = 'Medium. Line search increase parameter, in (0,1)'; + desc.autoRestart= 'Medium. Set to ''gra'' or ''fun'' to choose behavior when restart<0'; desc.maxCounts = 'Advanced. Vector that fine-tunes various types of iteration limits; same form as countOps'; @@ -78,9 +84,14 @@ desc.stopFcn = 'Advanced. User-supplied stopping criteria. See user guide'; desc.stopCrit = 'Advanced. Controls which stopping criteria to use; 1,2, 3 or 4.'; desc.printStopCrit = 'Advanced. Controls whether to display the value used in the stopping criteria'; - desc.cntr_reset = 'Advanced. Controls how often to reset some numerical computatios to avoid roundoff'; + desc.printRestart = 'Advanced. Whether to signal when a restart happens'; + desc.cntr_reset = 'Advanced. Controls how often to reset some numerical computations to avoid roundoff'; desc.debug = 'Advanced. Turns on more useful error messages'; + desc.stopping_criteria_always_use_x = 'Advanced. Forces usage of x, never y, in stopping crit.'; + desc.data_collection_always_use_x = 'Advanced. Forces usage of x, nevery y, in recording errors.'; + desc.output_always_use_x = 'Advanced. Forces output of x, never y. Default: uses whichever is better'; + desc.adjoint = 'Internal.'; desc.saddle = 'Internal. Used by TFOCS_SCD'; @@ -186,6 +197,14 @@ if any(odef.maxCounts 2); % Dec '13 + if restart > 10 + nLargeEnough = (n_iter - restart_iter > 2); + end + if d_dual < tol && nLargeEnough + % Problems when cur_dual is base on x one iteration + % and y another iteration status = 'Step size tolerance reached'; end end @@ -133,36 +150,50 @@ % the solution at the end of the algorithm. % -will_print = fid && printEvery && ( ~isempty( status ) || ~mod( n_iter, printEvery ) ); +will_print = fid && printEvery && ... + ( ~isempty( status ) || ~mod( n_iter, printEvery ) || (printRestart && just_restarted) ); if saveHist || will_print, - f_x_save = f_x; - g_Ax_save = g_Ax; - if ~isempty(errFcn) && saddle, - if isempty(g_Ax), - [ f_x, g_Ax ] = smoothF( A_x ); + % Which point to collect data at? Dec '13, by default, collect data + % at the same point used to find f_v unless data_collection_always_use_x + % There is also the chance that the f_v wasn't calculated at all + if ( data_collection_always_use_x && ~v_is_x ) || ( ~v_is_x && ~v_is_y ) + f_x_save = f_x; + g_Ax_save = g_Ax; + if ~isempty(errFcn) && saddle, + if isempty(g_Ax), + [ f_x, g_Ax ] = smoothF( A_x ); + end +% out.dual = get_dual( g_Ax ); + cur_dual = g_Ax ; end - out.dual = get_dual( g_Ax ); - end - if isinf(f_x), - f_x = smoothF( A_x ); - end - if isinf( C_x ), - C_x = projectorF( x ); + if isinf(f_x), + f_x = smoothF( A_x ); + end + if isinf( C_x ), + C_x = projectorF( x ); + end + f_v = maxmin * ( f_x + C_x ); + cur_pri = x; + v_is_x = true; + % Now undo any calculations + f_x = f_x_save; + g_Ax = g_Ax_save; end - f_w = maxmin * ( f_x + C_x ); + % (otherwise, f_v was already calculated) + if ~isempty(errFcn) && iscell(errFcn) for err_j = 1 : numel(errFcn), if saddle, - errs(err_j) = errFcn{err_j}(f_w,x,out.dual); +% errs(err_j) = errFcn{err_j}(f_w,x,out.dual); + errs(err_j) = errFcn{err_j}(f_v,cur_pri,get_dual(cur_dual)); else - errs(err_j) = errFcn{err_j}(f_w,x); + errs(err_j) = errFcn{err_j}(f_v,cur_pri); end end end - f_x = f_x_save; - g_Ax = g_Ax_save; end + % Register a warning if the step size suggests a Lipschitz violation if isempty(status) && ( beta < 1 && backtrack_simple && localL > Lexact ), warning_lipschitz = true; @@ -179,7 +210,7 @@ bchar = '*'; end fprintf( fid, '%-4d| %+12.5e %8.2e %8.2e%c', ... - n_iter, f_w, norm_dx / max( norm_x, 1 ), 1 / L, bchar ); + n_iter, f_v, norm_dx / max( norm_x, 1 ), 1 / L, bchar ); if countOps, fprintf( fid, '|' ); fprintf( fid, ' %5d', tfocs_count___ ); @@ -223,6 +254,10 @@ fprintf( fid, ' %8.2e', stopResid ); end + if printRestart && just_restarted + fprintf( fid, ' | restarted'); + end + fprintf( fid, '\n'); end @@ -241,7 +276,7 @@ out.err(end+1:csize,:) = 0; end end - out.f(n_iter) = f_w; + out.f(n_iter) = f_v; out.theta(n_iter) = theta; out.stepsize(n_iter) = 1 / L; out.normGrad(n_iter) = norm_dx; @@ -265,7 +300,19 @@ % Fixed it. % two changes: (1) test is now ( maxmin*f_v > maxmin*f_v_old) % (2) to reset f_v_old, set to maxmin*Inf, not just +Inf -if n_iter - restart_iter == abs(round(restart)) || (restart < 0 && maxmin*f_v > maxmin*f_v_old), +% Dec 2013, adding gradient-based restarting (see O'Donoghue and Candes '12) +just_restarted = false; +do_auto_restart = false; +if restart < 0 + if strfind(lower(autoRestart),'gra') + do_auto_restart = tfocs_dot(g_Ay, A_x - A_x_old ) > 0; + elseif any(strfind(lower(autoRestart),'fun')) || any(strfind(lower(autoRestart),'obj')) + do_auto_restart = maxmin*f_v > maxmin*f_v_old; + else + error('bad value for opts.autoRestart. Should be ''gradient'' or ''function'''); + end +end +if n_iter - restart_iter == abs(round(restart)) || do_auto_restart restart_iter = n_iter; backtrack_simple = true; theta = Inf; @@ -273,10 +320,13 @@ z = x; A_z = A_x; f_z = f_x; g_Az = g_Ax; g_z = g_x; C_z = C_x; f_v_old = maxmin*Inf; % important! % continue; + just_restarted = true; elseif restart < 0, f_v_old = f_v; end +C_y = Inf; + % TFOCS v1.3 by Stephen Becker, Emmanuel Candes, and Michael Grant. % Copyright 2013 California Institute of Technology and CVX Research. % See the file LICENSE for full license information. diff --git a/tfocs.m b/tfocs.m index b51f4a3..492ca77 100644 --- a/tfocs.m +++ b/tfocs.m @@ -82,7 +82,7 @@ || strcmpi(smoothF,'-v') || strcmpi(smoothF,'-version') ) % Display version information % type version.txt - disp('TFOCS v1.3 1.1, December 2011'); + disp('TFOCS v1.3, October 2013'); return end