5. Regression problems

Exercises Chapter 5

5.1 Polynomial Regression

Exercise 5.1. Consider the data given:

close all;
clear;
clc;

matlab.lang.OnOffSwitchState = 1;

data = [   -5.0000  -96.2607 
   -4.8000  -85.9893
   -4.6000  -55.2451
   -4.4000  -55.6153
   -4.2000  -44.8827
   -4.0000  -24.1306
   -3.8000  -19.4970
   -3.6000  -10.3972
   -3.4000   -2.2633
   -3.2000    0.2196
   -3.0000    4.5852
   -2.8000    7.1974
   -2.6000    8.2207
   -2.4000   16.0614
   -2.2000   16.4224
   -2.0000   17.5381
   -1.8000   11.4895
   -1.6000   14.1065
   -1.4000   16.8220
   -1.2000   16.1584
   -1.0000   11.6846
   -0.8000    5.9991
   -0.6000    7.8277
   -0.4000    2.8236
   -0.2000    2.7129
         0    1.1669
    0.2000   -1.4223
    0.4000   -3.8489
    0.6000   -4.7101
    0.8000   -8.1538
    1.0000   -7.3364
    1.2000  -13.6464
    1.4000  -15.2607
    1.6000  -14.8747
    1.8000   -9.9271
    2.0000  -10.5022
    2.2000   -7.7297
    2.4000  -11.7859
    2.6000  -10.2662
    2.8000   -7.1364
    3.0000   -2.1166
    3.2000    1.9428
    3.4000    4.0905
    3.6000   16.3151
    3.8000   16.9854
    4.0000   17.6418
    4.2000   46.3117
    4.4000   53.2609
    4.6000   72.3538
    4.8000   49.9166
    5.0000   89.1652];

x = data(:,1);
y = data(:,2);
l = length(x);
n = 4; % n-1 = 3 

% Vandermonde Matrix
A = zeros(l,n);

for i = 1:n
    A(:,i) = x.^(i-1);
end

a) Find the best approximating polynomial of degree 3 with respect to ∥ · ∥2.

z_2 = (A'*A)\(A'*y);
y_2 = A*z_2;

b) Find the best approximating polynomial of degree 3 w.r.t. ∥ · ∥1.

c = [zeros(n,1)
    ones(l,1)];

D = [A -eye(l)
    -A -eye(l)];

d = [y
    -y];

% Solve linear Programming
solution_1 = linprog(c,D,d);
z_1 = solution_1(1:n);
y_1 = A*z_1;

c) Find the best approximating polynomial of degree 3 w.r.t. ∥ · ∥∞.

c = [zeros(n,1)
    1];

D = [A -ones(l,1)
    -A -ones(l,1)];

d = [y
    -y];

solution_inf = linprog(c,D,d);
z_inf = solution_inf(1:n);
y_inf = A*z_inf;

% Plot 3 Norms
plot(x,y,'r.',x,y_1,'k-', x, y_2, 'b-', x, y_inf, 'g-');

5.2 Polynomial Regression. Linear Epsilon-Support Vector Regression

Exercise 5.2. Apply the linear ε-SV regression model with ε = 0.5 to the training data given.

data = [0    2.5584
    0.5000    2.6882
    1.0000    2.9627
    1.5000    3.2608
    2.0000    3.6235
    2.5000    3.9376
    3.0000    4.0383
    3.5000    4.1570
    4.0000    4.8498
    4.5000    4.6561
    5.0000    4.5119
    5.5000    4.8346
    6.0000    5.6039
    6.5000    5.5890
    7.0000    6.1914
    7.5000    5.8966
    8.0000    6.3866
    8.5000    6.6909
    9.0000    6.5224
    9.5000    7.1803
   10.0000    7.2537];

x = data(:,1);
y = data(:,2);
l = length(x);
epsilon = 0.5;

plot(x,y,'r.');

% Quadratic Values
Q = [1 0 
     0 0];

f = zeros(2,1);

D = [-x -ones(l,1)
      x  ones(l,1)];

d = epsilon*ones(2*l,1) + [-y;y];

% Solve the Quadratic Problem
options = optimset('LargeScale', 'off', 'Display','off');
solution = quadprog(Q,f,D,d,[],[],[],[],[],options);

% Compute w
w = solution(1);

% Compute b
b = solution(2);

% Plot the epsilon tube
u = w.*x + b; % Center line
v = w.*x + b + epsilon; % Up-line
vv = w.*x + b - epsilon; % Down-line

plot(x,y,'r.',x,u,'k-',x,v,'b-',x,vv,'b-');

5.3 Linear SVM with slack variables

Exercise 5.3. Apply the linear SVM with slack variables (set ε = 0.2 and C = 10) to the training data given:

close all;
clear;
clc;

matlab.lang.OnOffSwitchState = 1;
data = [ 0    2.5584
        0.5000    2.6882
        1.0000    2.9627
        1.5000    3.2608
        2.0000    3.6235
        2.5000    3.9376
        3.0000    4.0383
        3.5000    4.1570
        4.0000    4.8498
        4.5000    4.6561
        5.0000    4.5119
        5.5000    4.8346
        6.0000    5.6039
        6.5000    5.5890
        7.0000    6.1914
        7.5000    5.8966
        8.0000    6.3866
        8.5000    6.6909
        9.0000    6.5224
        9.5000    7.1803
       10.0000    7.2537];

x = data(:, 1);
y = data(:, 2);
l = length(x);

% Plot Data
plot(x,y,".");

%% linear regression - primal problem with slack variables
C = 10;
epsilon = 0.2;

% define the problem
Q = [ 1              zeros(1,2*l+1)
      zeros(2*l+1,1) zeros(2*l+1) ];
  
c = [ 0 ; 0 ; C*ones(2*l,1)]; 

D = [-x -ones(l,1) -eye(l)   zeros(l)
      x  ones(l,1)  zeros(l) -eye(l)];
  
d = epsilon*ones(2*l,1) + [-y;y];

options = optimset('Display','off');
% solve the problem
solution = quadprog(Q,c,D,d,[],[],[-inf;-inf;zeros(2*l,1)],[],[],options);

% compute w
w = solution(1);

% compute b
b = solution(2);

% compute slack variables xi+ and xi-
xip = solution(3:2+l);
xim = solution(3+l:2+2*l);

% find regression and epsilon-tube
z = w.*x + b ;
zp = w.*x + b + epsilon ;
zm = w.*x + b - epsilon ;

%% plot the solution

plot(x,y,'b.',x,z,'k-',x,zp,'r-',x,zm,'r-');
legend('Data','regression',...
    '\epsilon-tube','Location','NorthWest')

5.4 Linear SVM with slack variables - Dual Problem

Exercise 5.4. Solve the dual problem of the linear SVM with slack variables (set ε = 0.2 and C = 10) applied to the training data given. Moreover, find the support vectors:

close all;
clear;
clc;

matlab.lang.OnOffSwitchState = 1;

data = [ 0    2.5584
    0.5000    2.6882
    1.0000    2.9627
    1.5000    3.2608
    2.0000    3.6235
    2.5000    3.9376
    3.0000    4.0383
    3.5000    4.1570
    4.0000    4.8498
    4.5000    4.6561
    5.0000    4.5119
    5.5000    4.8346
    6.0000    5.6039
    6.5000    5.5890
    7.0000    6.1914
    7.5000    5.8966
    8.0000    6.3866
    8.5000    6.6909
    9.0000    6.5224
    9.5000    7.1803
   10.0000    7.2537];

x = data(:,1);
y = data(:,2);
l = length(x);

% Plot the dataset
plot(x,y,'b.');

% Environment variables
epsilon = 0.2;
C = 10;

% Define the problem
X = zeros(l,l);
for i = 1 : l
    for j = 1 : l
        X(i,j) = x(i)*x(j);
    end
end
Q = [ X -X ; -X X ]; 
c = epsilon*ones(2*l,1) + [-y;y]; 

% solve the problem
options = optimset('Display', 'off');
solution = quadprog(Q,c,[],[],[ones(1,l) -ones(1,l)],0,zeros(2*l,1),C*ones(2*l,1), [], options);
lap = solution(1:l);
lam = solution(l+1:2*l);

% compute w
w = (lap-lam)'*x ;

% compute b
ind = find(lap > 1e-3 & lap < C-1e-3);
if ~isempty(ind)
    i = ind(1);
    b = y(i) - w*x(i) - epsilon ;
else
    ind = find(lam > 1e-3 & lam < C-1e-3);
    i = ind(1);
    b = y(i) - w*x(i) + epsilon ;
end

% find regression and epsilon-tube
z = w.*x + b ;
zp = w.*x + b + epsilon ;
zm = w.*x + b - epsilon ;

%% plot the solution

% find support vectors
sv = [find(lap > 1e-3);find(lam > 1e-3)]; 
sv = sort(sv);

plot(x,y,'b.',x(sv),y(sv),...
    'go',x,z,'k-',x,zp,'r-',x,zm,'r-');

legend('Data','Support vectors',...
    'regression','\epsilon-tube',...
    'Location','NorthWest')

5.5 Nonlinear Support Vector Machine. Polynomial Kernel

Exercise 5.5. Consider the training data given. Find the nonlinear regression function given by the nonlinear SVM using a polynomial kernel with degree p = 3 and parameters ε = 10, C = 10. Moreover, find the support vectors:

close all;
clear;
clc; 

matlab.lang.OnOffSwitchState = 1;

data = [-5.0000  -96.2607 
   -4.8000  -85.9893
   -4.6000  -55.2451
   -4.4000  -55.6153
   -4.2000  -44.8827
   -4.0000  -24.1306
   -3.8000  -19.4970
   -3.6000  -10.3972
   -3.4000   -2.2633
   -3.2000    0.2196
   -3.0000    4.5852
   -2.8000    7.1974
   -2.6000    8.2207
   -2.4000   16.0614
   -2.2000   16.4224
   -2.0000   17.5381
   -1.8000   11.4895
   -1.6000   14.1065
   -1.4000   16.8220
   -1.2000   16.1584
   -1.0000   11.6846
   -0.8000    5.9991
   -0.6000    7.8277
   -0.4000    2.8236
   -0.2000    2.7129
         0    1.1669
    0.2000   -1.4223
    0.4000   -3.8489
    0.6000   -4.7101
    0.8000   -8.1538
    1.0000   -7.3364
    1.2000  -13.6464
    1.4000  -15.2607
    1.6000  -14.8747
    1.8000   -9.9271
    2.0000  -10.5022
    2.2000   -7.7297
    2.4000  -11.7859
    2.6000  -10.2662
    2.8000   -7.1364
    3.0000   -2.1166
    3.2000    1.9428
    3.4000    4.0905
    3.6000   16.3151
    3.8000   16.9854
    4.0000   17.6418
    4.2000   46.3117
    4.4000   53.2609
    4.6000   72.3538
    4.8000   49.9166
    5.0000   89.1652];

x = data(:,1) ;
y = data(:,2) ;
l = length(x) ; % number of points

plot(x,y, 'b.');

%% nonlinear regression - dual problem

epsilon = 10 ;
C = 10;

% define the problem
X = zeros(l,l);
for i = 1 : l
    for j = 1 : l
        X(i,j) = kernel(x(i),x(j)) ;
    end
end
Q = [ X -X ; -X X ];   
c = epsilon*ones(2*l,1) + [-y;y]; 

% solve the problem
options = optimset('Display', 'off');
solution = quadprog(Q,c,[],[],...
    [ones(1,l) -ones(1,l)],0,...
    zeros(2*l,1),C*ones(2*l,1), [], options);
lap = solution(1:l);
lam = solution(l+1:2*l);

% compute b
ind = find(lap > 1e-3 & lap < C-1e-3);
if ~isempty(ind)
    i = ind(1);
    b = y(i) - epsilon;
    for j = 1 : l
        b = b - (lap(j)-lam(j))*kernel(x(i),x(j));
    end
else
    ind = find(lam > 1e-3 & lam < C-1e-3);
    i = ind(1);
    b = y(i) + epsilon ;
    for j = 1 : l
        b = b - (lap(j)-lam(j))*kernel(x(i),x(j));
    end
end

% find regression and epsilon-tube
z = zeros(l,1);
for i = 1 : l
    z(i) = b ;
    for j = 1 : l
        z(i) = z(i) + (lap(j)-lam(j))*kernel(x(i),x(j));
    end
end
zp = z + epsilon ;
zm = z - epsilon ;

%% plot the solution

% find support vectors
sv = [find(lap > 1e-3);find(lam > 1e-3)]; 
sv = sort(sv);

plot(x,y,'b.',x(sv),y(sv),...
    'ro',x,z,'k-',x,zp,'r-',x,zm,'r-');

legend('Data','Support vectors',...
    'regression','\epsilon-tube',...
    'Location','NorthWest')


function v = kernel(x,y)
    %% kernel function
    p = 4 ;
    v = (x'*y + 1)^p;
end

5.6 Nonlinear Support Vector Machine. Gaussian Kernel

Exercise 5.6. Consider the training data given. Find the nonlinear regression function given by the nonlinear SVM using a Gaussian kernel with γ = 1 and parameters ε = 0.3, C = 10. Moreover, find the support vectors.

close all;
clear;
clc;

matlab.lang.OnOffSwitchState = 1;

data = [ 0    0.0620
    0.1000    0.2247
    0.2000    0.4340
    0.3000    0.6490
    0.4000    0.7370
    0.5000    0.8719
    0.6000    0.9804
    0.7000    1.0192
    0.8000    1.0794
    0.9000    1.0726
    1.0000    0.9252
    1.1000    0.8322
    1.2000    0.7457
    1.3000    0.5530
    1.4000    0.4324
    1.5000    0.2384
    1.6000    0.0060
    1.7000   -0.1695
    1.8000   -0.4023
    1.9000   -0.5487
    2.0000   -0.6583
    2.1000   -0.8156
    2.2000   -0.8582
    2.3000   -0.9217
    2.4000   -0.9478
    2.5000   -0.8950
    2.6000   -0.7947
    2.7000   -0.7529
    2.8000   -0.5917
    2.9000   -0.3654
    3.0000   -0.2392
    3.1000   -0.0172
    3.2000    0.2067
    3.3000    0.4111
    3.4000    0.5594
    3.5000    0.6678
    3.6000    0.7973
    3.7000    0.9605
    3.8000    1.0246
    3.9000    1.0947
    4.0000    1.0640
    4.1000    1.0070
    4.2000    0.9069
    4.3000    0.7604
    4.4000    0.6811
    4.5000    0.4661
    4.6000    0.2259
    4.7000    0.0944
    4.8000   -0.1224
    4.9000   -0.3606
    5.0000   -0.4550
    5.1000   -0.6669
    5.2000   -0.8049
    5.3000   -0.9114
    5.4000   -0.9498
    5.5000   -0.9771
    5.6000   -0.9140
    5.7000   -0.9127
    5.8000   -0.7953
    5.9000   -0.6653
    6.0000   -0.4486
    6.1000   -0.3138
    6.2000   -0.0900
    6.3000    0.0940
    6.4000    0.3098
    6.5000    0.4316
    6.6000    0.6899
    6.7000    0.8252
    6.8000    0.8642
    6.9000    0.9903
    7.0000    1.0232
    7.1000    1.0610
    7.2000    0.9887
    7.3000    0.9528
    7.4000    0.8486
    7.5000    0.7103
    7.6000    0.5312
    7.7000    0.3067
    7.8000    0.1591
    7.9000   -0.0511
    8.0000   -0.2771
    8.1000   -0.4264
    8.2000   -0.5930
    8.3000   -0.7232
    8.4000   -0.8070
    8.5000   -0.8913
    8.6000   -0.9097
    8.7000   -0.9874
    8.8000   -0.9269
    8.9000   -0.8212
    9.0000   -0.6551
    9.1000   -0.5258
    9.2000   -0.3894
    9.3000   -0.2136
    9.4000   -0.0436
    9.5000    0.2240
    9.6000    0.3940
    9.7000    0.5431
    9.8000    0.7247
    9.9000    0.8305
   10.0000    0.9881];

x = data(:,1);
y = data(:,2);
l = length(x); % number of points

plot(x,y,'b.');

%% nonlinear regression - dual problem

epsilon = 0.2;
C = 10;

% define the problem
X = zeros(l,l);
for i = 1:l
    for j = 1:l
        X(i,j) = kernel(x(i),x(j)) ;
    end
end
Q = [ X -X ; -X X ];
c = epsilon*ones(2*l,1) + [-y;y]; 

% solve the problem
options = optimset('Display', 'off');
solution = quadprog(Q,c,[],[],[ones(1,l) -ones(1,l)],0,zeros(2*l,1),C*ones(2*l,1), [], options);
lap = solution(1:l);
lam = solution(l+1:2*l);

% compute b
ind = find(lap > 1e-3 & lap < C-1e-3);
if ~isempty(ind)
    i = ind(1);
    b = y(i) - epsilon;
    for j = 1:l
        b = b - (lap(j)-lam(j))*kernel(x(i),x(j));
    end
else
    ind = find(lam > 1e-3 & lam < C-1e-3);
    i = ind(1);
    b = y(i) + epsilon ;
    for j = 1:l
        b = b - (lap(j)-lam(j))*kernel(x(i),x(j));
    end
end

% find regression and epsilon-tube
z = zeros(l,1);
for i = 1:l
    z(i) = b ;
    for j = 1:l
        z(i) = z(i) + (lap(j)-lam(j))*kernel(x(i),x(j));
    end
end
zp = z + epsilon ;
zm = z - epsilon ;


% find support vectors
sv = [find(lap > 1e-3)
      find(lam > 1e-3)]; 
sv = sort(sv);

%% plot the solution
plot(x,y,'b.',x(sv),y(sv),...
    'ro',x,z,'k-',x,zp,'r-',x,zm,'r-');

legend('Data','Support vectors',...
    'regression','\epsilon-tube',...
    'Location','NorthEast')

function v = kernel(x,y)
    %% kernel function
    gamma = 1 ;
    v = exp(-gamma*norm(x-y)^2);
end

Provide feedback

Saved searches

Use saved searches to filter your results more quickly