- 
                Notifications
    You must be signed in to change notification settings 
- Fork 1
6. Clustering problems
        Marsha Gómez edited this page May 25, 2023 
        ·
        4 revisions
      
    Exercise 6.1. Consider the k-means algorithm, with k = 3, for the set of patterns given:
- a) Run the algorithm starting from centroids x1 = (5, 7), x2 = (6, 3), x3 = (4, 3).
- b) Run the algorithm starting from centroids x1 = (5, 7), x2 = (6, 3), x3 = (4, 4).
- c) Is it possible to improve the solutions obtained in a) and b)?
close all; 
clear; 
clc;
matlab.lang.OnOffSwitchState = 1;
global data l;
data = [1.2734    6.2721
        2.7453    7.4345
        1.6954    8.6408
        1.1044    8.6364
        4.8187    7.3664
        2.7224    6.3303
        4.8462    8.4123
        4.0497    6.7696
        1.0294    8.6174
        3.7202    5.1327
        3.8238    7.1297
        3.5805    7.8660
        3.2092    5.7172
        1.8724    6.3461
        4.0895    5.7509
        1.9121    6.2877
        2.4835    6.6154
        4.5637    7.1943
        4.4255    5.1950
        2.6097    7.2109
        6.0992    8.0496
        5.9660    7.3042
        5.9726    7.9907
        5.6166    7.5821
        8.8257    5.4929
        8.7426    7.0176
        8.2749    6.3890
        7.9130    5.3686
        5.7032    5.5914
        6.4415    5.7927
        5.7552    7.6891
        5.0048    6.7260
        6.2657    7.7776
        7.7985    6.0271
        7.5010    5.0390
        7.1722    7.1291
        6.7561    6.1176
        6.1497    8.7849
        7.0066    8.6258
        8.0462    6.5707
        3.0994    1.7722
        5.6857    2.3666
        6.3487    4.7316
        6.8860    2.5627
        3.2277    2.0929
        4.8013    1.6078
        5.3299    2.5884
        5.7466    2.4989
        5.8777    1.5245
        5.6002    2.7402
        5.9077    1.3661
        4.4954    3.4585
        5.3263    1.0439
        3.4645    3.2930
        3.2306    4.1589
        6.9191    1.9415
        4.1393    2.7921
        5.3799    3.2774
        6.8486    1.2456
        3.7431    2.9852];
% number of patterns
l = size(data, 1); 
k = 3;
% initialize centroids
x = [5 7
     6 3 
     4 3];  % part a)
fprintf("Exercise A. Centroids \n");
for i = 1:k
    fprintf("%.2f %.2f \n", x(i, 1), x(i, 2));
end
exercise_kmeans(x, k);
x = [5 7
     6 3
     4 4];  % part b)
Output
 
fprintf("Exercise B. Centroids \n");
for i = 1:k
    fprintf("%.2f %.2f \n", x(i, 1), x(i, 2));
end
exercise_kmeans(x, k);
Output
 
function exercise_kmeans(centroid, k)
    global data l;
    % plot patterns
    plot(data(:,1),data(:,2),'ko');
    axis([0 10 0 10])
    title('k-means algorithm');
    hold on
    
    x = centroid;
    % plot centroids
    plot(x(1,1),x(1,2),'b^',...
        x(2,1),x(2,2),'r^',...
        x(3,1),x(3,2),'g^');
    
    pause
    
    % initialize clusters
    cluster = zeros(l,1);
    for i = 1 : l
        d = inf;
        for j = 1 : k
            if norm(data(i,:)-x(j,:)) < d
                d = norm(data(i,:)-x(j,:));
                cluster(i) = j;
            end
        end
    end
    
    % plot cluster
    c1 = data(cluster==1,:);
    c2 = data(cluster==2,:);
    c3 = data(cluster==3,:);
    plot(c1(:,1),c1(:,2),'bo',c2(:,1),c2(:,2),'ro',...
        c3(:,1),c3(:,2),'go');
    
    % compute the objective function value
    v = 0;
    for i = 1 : l
        v = v + norm(data(i,:)-x(cluster(i),:))^2 ;
    end
    title(['k-means algoritm: objective function = ',num2str(v)]);
    
    pause
    
    while true
        
        % delete old centroids
        plot(x(1,1),x(1,2),'w^',...
            x(2,1),x(2,2),'w^',...
            x(3,1),x(3,2),'w^');
        
        % update centroids
        for j = 1 : k
            ind = find(cluster == j);
            if ~isempty(ind)
                x(j,:) = mean(data(ind,:));
            end
        end
        
        % plot new centroids
        plot(x(1,1),x(1,2),'b^',...
            x(2,1),x(2,2),'r^',...
            x(3,1),x(3,2),'g^');
        
        pause
        
        % update clusters
        for i = 1 : l
            d = inf;
            for j = 1 : k
                if norm(data(i,:)-x(j,:)) < d
                    d = norm(data(i,:)-x(j,:));
                    cluster(i) = j;
                end
            end
        end
        
        % plot cluster
        c1 = data(cluster==1,:);
        c2 = data(cluster==2,:);
        c3 = data(cluster==3,:);
        plot(c1(:,1),c1(:,2),'bo',c2(:,1),c2(:,2),...
            'ro',c3(:,1),c3(:,2),'go');
        
        
        % update objective function
        vnew = 0;
        for i = 1 : l
            vnew = vnew + norm(data(i,:)-x(cluster(i),:))^2 ;
        end
        title(['k-means algoritm: objective function = ',num2str(vnew)]);
    
        pause
        
        % stopping criterion
        if v - vnew < 1e-5
            break
        else
            v = vnew;
        end
        
    end
    
    v
    hold off
endvbest = inf;
xbest = [];
clusterbest = [];
maxiter = 100;
iter = 0;
while iter < maxiter
    InitialCentroids = 10*rand(k,2);
    [best,bestcluster,v, D] = kmeans(data,k,'Start',InitialCentroids);
    if v < vbest
        xbest = best;
        clusterbest = bestcluster;
        vbest = v
    end
    iter = iter + 1;
end
fprintf("Exercise C. Centroids \n");
for i = 1:k
    fprintf("%.2f %.2f \n", clusterbest(i, 1), clusterbest(i, 2));
end
exercise_kmeans(clusterbest, k);Output
 
Exercise 6.1. Consider the k-median algorithm, with k = 3, for the set of patterns given:
- a) Run the algorithm starting from centroids x1 = (5, 7), x2 = (6, 3), x3 = (4, 3).
- b) Run the algorithm starting from centroids x1 = (5, 7), x2 = (6, 3), x3 = (4, 4).
- c) Is it possible to improve the solutions obtained in a) and b)?