-
Notifications
You must be signed in to change notification settings - Fork 1
5. Regression problems
Exercise 5.1. Consider the data given:
close all;
clear;
clc;
matlab.lang.OnOffSwitchState = 1;
data = [ -5.0000 -96.2607
-4.8000 -85.9893
-4.6000 -55.2451
-4.4000 -55.6153
-4.2000 -44.8827
-4.0000 -24.1306
-3.8000 -19.4970
-3.6000 -10.3972
-3.4000 -2.2633
-3.2000 0.2196
-3.0000 4.5852
-2.8000 7.1974
-2.6000 8.2207
-2.4000 16.0614
-2.2000 16.4224
-2.0000 17.5381
-1.8000 11.4895
-1.6000 14.1065
-1.4000 16.8220
-1.2000 16.1584
-1.0000 11.6846
-0.8000 5.9991
-0.6000 7.8277
-0.4000 2.8236
-0.2000 2.7129
0 1.1669
0.2000 -1.4223
0.4000 -3.8489
0.6000 -4.7101
0.8000 -8.1538
1.0000 -7.3364
1.2000 -13.6464
1.4000 -15.2607
1.6000 -14.8747
1.8000 -9.9271
2.0000 -10.5022
2.2000 -7.7297
2.4000 -11.7859
2.6000 -10.2662
2.8000 -7.1364
3.0000 -2.1166
3.2000 1.9428
3.4000 4.0905
3.6000 16.3151
3.8000 16.9854
4.0000 17.6418
4.2000 46.3117
4.4000 53.2609
4.6000 72.3538
4.8000 49.9166
5.0000 89.1652];
x = data(:,1);
y = data(:,2);
l = length(x);
n = 4; % n-1 = 3
% Vandermonde Matrix
A = zeros(l,n);
for i = 1:n
A(:,i) = x.^(i-1);
end
- a) Find the best approximating polynomial of degree 3 with respect to ∥ · ∥2.
z_2 = (A'*A)\(A'*y);
y_2 = A*z_2;
- b) Find the best approximating polynomial of degree 3 w.r.t. ∥ · ∥1.
c = [zeros(n,1)
ones(l,1)];
D = [A -eye(l)
-A -eye(l)];
d = [y
-y];
% Solve linear Programming
solution_1 = linprog(c,D,d);
z_1 = solution_1(1:n);
y_1 = A*z_1;
- c) Find the best approximating polynomial of degree 3 w.r.t. ∥ · ∥∞.
c = [zeros(n,1)
1];
D = [A -ones(l,1)
-A -ones(l,1)];
d = [y
-y];
solution_inf = linprog(c,D,d);
z_inf = solution_inf(1:n);
y_inf = A*z_inf;
% Plot 3 Norms
plot(x,y,'r.',x,y_1,'k-', x, y_2, 'b-', x, y_inf, 'g-');
Output

Exercise 5.2. Apply the linear ε-SV regression model with ε = 0.5 to the training data given.
data = [0 2.5584
0.5000 2.6882
1.0000 2.9627
1.5000 3.2608
2.0000 3.6235
2.5000 3.9376
3.0000 4.0383
3.5000 4.1570
4.0000 4.8498
4.5000 4.6561
5.0000 4.5119
5.5000 4.8346
6.0000 5.6039
6.5000 5.5890
7.0000 6.1914
7.5000 5.8966
8.0000 6.3866
8.5000 6.6909
9.0000 6.5224
9.5000 7.1803
10.0000 7.2537];
x = data(:,1);
y = data(:,2);
l = length(x);
epsilon = 0.5;
plot(x,y,'r.');
Output

% Quadratic Values
Q = [1 0
0 0];
f = zeros(2,1);
D = [-x -ones(l,1)
x ones(l,1)];
d = epsilon*ones(2*l,1) + [-y;y];
% Solve the Quadratic Problem
options = optimset('LargeScale', 'off', 'Display','off');
solution = quadprog(Q,f,D,d,[],[],[],[],[],options);
% Compute w
w = solution(1);
% Compute b
b = solution(2);
% Plot the epsilon tube
u = w.*x + b; % Center line
v = w.*x + b + epsilon; % Up-line
vv = w.*x + b - epsilon; % Down-line
plot(x,y,'r.',x,u,'k-',x,v,'b-',x,vv,'b-');
Output

Exercise 5.3. Apply the linear SVM with slack variables (set ε = 0.2 and C = 10) to the training data given:
close all;
clear;
clc;
matlab.lang.OnOffSwitchState = 1;
data = [ 0 2.5584
0.5000 2.6882
1.0000 2.9627
1.5000 3.2608
2.0000 3.6235
2.5000 3.9376
3.0000 4.0383
3.5000 4.1570
4.0000 4.8498
4.5000 4.6561
5.0000 4.5119
5.5000 4.8346
6.0000 5.6039
6.5000 5.5890
7.0000 6.1914
7.5000 5.8966
8.0000 6.3866
8.5000 6.6909
9.0000 6.5224
9.5000 7.1803
10.0000 7.2537];
x = data(:, 1);
y = data(:, 2);
l = length(x);
% Plot Data
plot(x,y,".");
Output

%% linear regression - primal problem with slack variables
C = 10;
epsilon = 0.2;
% define the problem
Q = [ 1 zeros(1,2*l+1)
zeros(2*l+1,1) zeros(2*l+1) ];
c = [ 0 ; 0 ; C*ones(2*l,1)];
D = [-x -ones(l,1) -eye(l) zeros(l)
x ones(l,1) zeros(l) -eye(l)];
d = epsilon*ones(2*l,1) + [-y;y];
options = optimset('Display','off');
% solve the problem
solution = quadprog(Q,c,D,d,[],[],[-inf;-inf;zeros(2*l,1)],[],[],options);
% compute w
w = solution(1);
% compute b
b = solution(2);
% compute slack variables xi+ and xi-
xip = solution(3:2+l);
xim = solution(3+l:2+2*l);
% find regression and epsilon-tube
z = w.*x + b ;
zp = w.*x + b + epsilon ;
zm = w.*x + b - epsilon ;
%% plot the solution
plot(x,y,'b.',x,z,'k-',x,zp,'r-',x,zm,'r-');
legend('Data','regression',...
'\epsilon-tube','Location','NorthWest')
Output

Exercise 5.4. Solve the dual problem of the linear SVM with slack variables (set ε = 0.2 and C = 10) applied to the training data given. Moreover, find the support vectors:
close all;
clear;
clc;
matlab.lang.OnOffSwitchState = 1;
data = [ 0 2.5584
0.5000 2.6882
1.0000 2.9627
1.5000 3.2608
2.0000 3.6235
2.5000 3.9376
3.0000 4.0383
3.5000 4.1570
4.0000 4.8498
4.5000 4.6561
5.0000 4.5119
5.5000 4.8346
6.0000 5.6039
6.5000 5.5890
7.0000 6.1914
7.5000 5.8966
8.0000 6.3866
8.5000 6.6909
9.0000 6.5224
9.5000 7.1803
10.0000 7.2537];
x = data(:,1);
y = data(:,2);
l = length(x);
% Plot the dataset
plot(x,y,'b.');
Output

% Environment variables
epsilon = 0.2;
C = 10;
% Define the problem
X = zeros(l,l);
for i = 1 : l
for j = 1 : l
X(i,j) = x(i)*x(j);
end
end
Q = [ X -X ; -X X ];
c = epsilon*ones(2*l,1) + [-y;y];
% solve the problem
options = optimset('Display', 'off');
solution = quadprog(Q,c,[],[],[ones(1,l) -ones(1,l)],0,zeros(2*l,1),C*ones(2*l,1), [], options);
lap = solution(1:l);
lam = solution(l+1:2*l);
% compute w
w = (lap-lam)'*x ;
% compute b
ind = find(lap > 1e-3 & lap < C-1e-3);
if ~isempty(ind)
i = ind(1);
b = y(i) - w*x(i) - epsilon ;
else
ind = find(lam > 1e-3 & lam < C-1e-3);
i = ind(1);
b = y(i) - w*x(i) + epsilon ;
end
% find regression and epsilon-tube
z = w.*x + b ;
zp = w.*x + b + epsilon ;
zm = w.*x + b - epsilon ;
%% plot the solution
% find support vectors
sv = [find(lap > 1e-3);find(lam > 1e-3)];
sv = sort(sv);
plot(x,y,'b.',x(sv),y(sv),...
'go',x,z,'k-',x,zp,'r-',x,zm,'r-');
legend('Data','Support vectors',...
'regression','\epsilon-tube',...
'Location','NorthWest')
Output

Exercise 5.5. Consider the training data given. Find the nonlinear regression function given by the nonlinear SVM using a polynomial kernel with degree p = 3 and parameters ε = 10, C = 10. Moreover, find the support vectors:
close all;
clear;
clc;
matlab.lang.OnOffSwitchState = 1;
data = [-5.0000 -96.2607
-4.8000 -85.9893
-4.6000 -55.2451
-4.4000 -55.6153
-4.2000 -44.8827
-4.0000 -24.1306
-3.8000 -19.4970
-3.6000 -10.3972
-3.4000 -2.2633
-3.2000 0.2196
-3.0000 4.5852
-2.8000 7.1974
-2.6000 8.2207
-2.4000 16.0614
-2.2000 16.4224
-2.0000 17.5381
-1.8000 11.4895
-1.6000 14.1065
-1.4000 16.8220
-1.2000 16.1584
-1.0000 11.6846
-0.8000 5.9991
-0.6000 7.8277
-0.4000 2.8236
-0.2000 2.7129
0 1.1669
0.2000 -1.4223
0.4000 -3.8489
0.6000 -4.7101
0.8000 -8.1538
1.0000 -7.3364
1.2000 -13.6464
1.4000 -15.2607
1.6000 -14.8747
1.8000 -9.9271
2.0000 -10.5022
2.2000 -7.7297
2.4000 -11.7859
2.6000 -10.2662
2.8000 -7.1364
3.0000 -2.1166
3.2000 1.9428
3.4000 4.0905
3.6000 16.3151
3.8000 16.9854
4.0000 17.6418
4.2000 46.3117
4.4000 53.2609
4.6000 72.3538
4.8000 49.9166
5.0000 89.1652];
x = data(:,1) ;
y = data(:,2) ;
l = length(x) ; % number of points
plot(x,y, 'b.');
Output

%% nonlinear regression - dual problem
epsilon = 10 ;
C = 10;
% define the problem
X = zeros(l,l);
for i = 1 : l
for j = 1 : l
X(i,j) = kernel(x(i),x(j)) ;
end
end
Q = [ X -X ; -X X ];
c = epsilon*ones(2*l,1) + [-y;y];
% solve the problem
options = optimset('Display', 'off');
solution = quadprog(Q,c,[],[],...
[ones(1,l) -ones(1,l)],0,...
zeros(2*l,1),C*ones(2*l,1), [], options);
lap = solution(1:l);
lam = solution(l+1:2*l);
% compute b
ind = find(lap > 1e-3 & lap < C-1e-3);
if ~isempty(ind)
i = ind(1);
b = y(i) - epsilon;
for j = 1 : l
b = b - (lap(j)-lam(j))*kernel(x(i),x(j));
end
else
ind = find(lam > 1e-3 & lam < C-1e-3);
i = ind(1);
b = y(i) + epsilon ;
for j = 1 : l
b = b - (lap(j)-lam(j))*kernel(x(i),x(j));
end
end
% find regression and epsilon-tube
z = zeros(l,1);
for i = 1 : l
z(i) = b ;
for j = 1 : l
z(i) = z(i) + (lap(j)-lam(j))*kernel(x(i),x(j));
end
end
zp = z + epsilon ;
zm = z - epsilon ;
%% plot the solution
% find support vectors
sv = [find(lap > 1e-3);find(lam > 1e-3)];
sv = sort(sv);
plot(x,y,'b.',x(sv),y(sv),...
'ro',x,z,'k-',x,zp,'r-',x,zm,'r-');
legend('Data','Support vectors',...
'regression','\epsilon-tube',...
'Location','NorthWest')
function v = kernel(x,y)
%% kernel function
p = 4 ;
v = (x'*y + 1)^p;
end
Output

Exercise 5.6. Consider the training data given. Find the nonlinear regression function given by the nonlinear SVM using a Gaussian kernel with γ = 1 and parameters ε = 0.3, C = 10. Moreover, find the support vectors.
close all;
clear;
clc;
matlab.lang.OnOffSwitchState = 1;
data = [ 0 0.0620
0.1000 0.2247
0.2000 0.4340
0.3000 0.6490
0.4000 0.7370
0.5000 0.8719
0.6000 0.9804
0.7000 1.0192
0.8000 1.0794
0.9000 1.0726
1.0000 0.9252
1.1000 0.8322
1.2000 0.7457
1.3000 0.5530
1.4000 0.4324
1.5000 0.2384
1.6000 0.0060
1.7000 -0.1695
1.8000 -0.4023
1.9000 -0.5487
2.0000 -0.6583
2.1000 -0.8156
2.2000 -0.8582
2.3000 -0.9217
2.4000 -0.9478
2.5000 -0.8950
2.6000 -0.7947
2.7000 -0.7529
2.8000 -0.5917
2.9000 -0.3654
3.0000 -0.2392
3.1000 -0.0172
3.2000 0.2067
3.3000 0.4111
3.4000 0.5594
3.5000 0.6678
3.6000 0.7973
3.7000 0.9605
3.8000 1.0246
3.9000 1.0947
4.0000 1.0640
4.1000 1.0070
4.2000 0.9069
4.3000 0.7604
4.4000 0.6811
4.5000 0.4661
4.6000 0.2259
4.7000 0.0944
4.8000 -0.1224
4.9000 -0.3606
5.0000 -0.4550
5.1000 -0.6669
5.2000 -0.8049
5.3000 -0.9114
5.4000 -0.9498
5.5000 -0.9771
5.6000 -0.9140
5.7000 -0.9127
5.8000 -0.7953
5.9000 -0.6653
6.0000 -0.4486
6.1000 -0.3138
6.2000 -0.0900
6.3000 0.0940
6.4000 0.3098
6.5000 0.4316
6.6000 0.6899
6.7000 0.8252
6.8000 0.8642
6.9000 0.9903
7.0000 1.0232
7.1000 1.0610
7.2000 0.9887
7.3000 0.9528
7.4000 0.8486
7.5000 0.7103
7.6000 0.5312
7.7000 0.3067
7.8000 0.1591
7.9000 -0.0511
8.0000 -0.2771
8.1000 -0.4264
8.2000 -0.5930
8.3000 -0.7232
8.4000 -0.8070
8.5000 -0.8913
8.6000 -0.9097
8.7000 -0.9874
8.8000 -0.9269
8.9000 -0.8212
9.0000 -0.6551
9.1000 -0.5258
9.2000 -0.3894
9.3000 -0.2136
9.4000 -0.0436
9.5000 0.2240
9.6000 0.3940
9.7000 0.5431
9.8000 0.7247
9.9000 0.8305
10.0000 0.9881];
x = data(:,1);
y = data(:,2);
l = length(x); % number of points
plot(x,y,'b.');
Output

%% nonlinear regression - dual problem
epsilon = 0.2;
C = 10;
% define the problem
X = zeros(l,l);
for i = 1:l
for j = 1:l
X(i,j) = kernel(x(i),x(j)) ;
end
end
Q = [ X -X ; -X X ];
c = epsilon*ones(2*l,1) + [-y;y];
% solve the problem
options = optimset('Display', 'off');
solution = quadprog(Q,c,[],[],[ones(1,l) -ones(1,l)],0,zeros(2*l,1),C*ones(2*l,1), [], options);
lap = solution(1:l);
lam = solution(l+1:2*l);
% compute b
ind = find(lap > 1e-3 & lap < C-1e-3);
if ~isempty(ind)
i = ind(1);
b = y(i) - epsilon;
for j = 1:l
b = b - (lap(j)-lam(j))*kernel(x(i),x(j));
end
else
ind = find(lam > 1e-3 & lam < C-1e-3);
i = ind(1);
b = y(i) + epsilon ;
for j = 1:l
b = b - (lap(j)-lam(j))*kernel(x(i),x(j));
end
end
% find regression and epsilon-tube
z = zeros(l,1);
for i = 1:l
z(i) = b ;
for j = 1:l
z(i) = z(i) + (lap(j)-lam(j))*kernel(x(i),x(j));
end
end
zp = z + epsilon ;
zm = z - epsilon ;
% find support vectors
sv = [find(lap > 1e-3)
find(lam > 1e-3)];
sv = sort(sv);
%% plot the solution
plot(x,y,'b.',x(sv),y(sv),...
'ro',x,z,'k-',x,zp,'r-',x,zm,'r-');
legend('Data','Support vectors',...
'regression','\epsilon-tube',...
'Location','NorthEast')
function v = kernel(x,y)
%% kernel function
gamma = 1 ;
v = exp(-gamma*norm(x-y)^2);
end
Output
