|
| 1 | +# !/usr/bin/python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +# |
| 4 | +# Copyright @2022 AI, ZHIHU Inc. (zhihu.com) |
| 5 | +# |
| 6 | +# @author: lifeiyang <[email protected]> |
| 7 | +# @date: 2022/11/24 |
| 8 | +# |
| 9 | +"""""" |
| 10 | +import math |
| 11 | + |
| 12 | + |
| 13 | +class Solution: |
| 14 | + def distance(self, node1, node2): |
| 15 | + return math.sqrt(math.pow(node1[0] - node2[0], 2) + math.pow(node1[1] - node2[1], 2)) |
| 16 | + |
| 17 | + def point_mean(self, point_list): |
| 18 | + point_x = sum([point[0] for point in point_list]) / len(point_list) |
| 19 | + point_y = sum([point[1] for point in point_list]) / len(point_list) |
| 20 | + return (point_x, point_y) |
| 21 | + |
| 22 | + def kmeans(self, data_list, k): |
| 23 | + # 1、随机初始化点 |
| 24 | + k_cluster = {} |
| 25 | + for i in range(k): |
| 26 | + k_cluster[tuple(data_list[i])] = [] |
| 27 | + |
| 28 | + # 2、根据距离加入质心 |
| 29 | + for point in data_list: |
| 30 | + min_distance = math.inf |
| 31 | + min_kernel = None |
| 32 | + for kernel in k_cluster: |
| 33 | + if min_distance > self.distance(point, kernel): |
| 34 | + min_distance = self.distance(point, kernel) |
| 35 | + min_kernel = kernel |
| 36 | + |
| 37 | + k_cluster[min_kernel].append(point) |
| 38 | + |
| 39 | + # 3、开始循环迭代 |
| 40 | + k_kernel_old = k_cluster.copy() |
| 41 | + while True: |
| 42 | + # 新一轮的迭代 |
| 43 | + # 1、寻找当前的质心 |
| 44 | + k_cluster = {} |
| 45 | + for kernel in k_kernel_old: |
| 46 | + kernel_mean = self.point_mean(k_kernel_old[kernel]) |
| 47 | + k_cluster[kernel_mean] = [] |
| 48 | + |
| 49 | + # 2、根据距离加入质心 |
| 50 | + for point in data_list: |
| 51 | + min_distance = math.inf |
| 52 | + min_kernel = None |
| 53 | + for kernel in k_cluster: |
| 54 | + if min_distance > self.distance(point, kernel): |
| 55 | + min_distance = self.distance(point, kernel) |
| 56 | + min_kernel = kernel |
| 57 | + k_cluster[min_kernel].append(point) |
| 58 | + |
| 59 | + if k_cluster == k_kernel_old: |
| 60 | + print("final kmeans:", k_cluster) |
| 61 | + break |
| 62 | + else: |
| 63 | + k_kernel_old = k_cluster.copy() |
| 64 | + print("now kmeans:", k_cluster) |
| 65 | + |
| 66 | + |
| 67 | +if __name__ == "__main__": |
| 68 | + solution = Solution() |
| 69 | + print(solution.kmeans([[0, 5], [0, 6], [4, 0], [5, 0]], 2)) |
0 commit comments