python scripts

seraogianluca · Jul 11, 2020 · 869362a · 869362a
1 parent 387e88d
commit 869362a
Show file tree

Hide file tree

Showing 4 changed files with 95 additions and 187 deletions.
diff --git a/scripts/benchmark.ipynb b/scripts/benchmark.ipynb
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
@@ -0,0 +1,58 @@
+from sklearn.cluster import KMeans
+import numpy as np
+import time
+from statistics import mean
+import pandas as pd
+
+
+points = []
+clusters= 4
+
+start_milli_time = round(time.time() * 1000, 4)
+with open("dataset_2d_4centr.txt", "r") as file:
+    for line in file:
+        comps = line.split(",")
+        point = [float(comps[i]) for i in range (len(comps)) ] 
+        points.append(point)
+
+dataset = np.array(points)
+kmeans = KMeans(n_clusters=clusters, init='random', precompute_distances=False, random_state=0).fit(dataset)
+
+end_milli_time = round(time.time() * 1000, 4)
+execution_time = round(end_milli_time - start_milli_time, 4)
+
+#with open('output_2d.txt', 'a') as f:
+#    f.write("execution time: " + str(execution_time) + ' ms \n')
+#    f.write('centroids:\n'+ str(kmeans.cluster_centers_) + '\n')
+#    f.write('n_iter: ' + str(kmeans.n_iter_) + '\n\n')
+
+
+
+print(str(execution_time))
+print(str(kmeans.cluster_centers_))
+print(str(kmeans.n_iter_))
+
+
+
+#plot black points and red centroids
+cent = kmeans.cluster_centers_
+centr = np.array(cent) 
+
+#concatena i punti con i centroidi
+c = np.vstack([dataset, centr])
+
+df = pd.DataFrame({'x': c[:, 0], 'y': c[:, 1]})
+
+
+# Declare a list that is to be converted into a column 
+labels = []
+for i in range(0,1000):
+    labels.append('black')
+for i in range(0,4):
+    labels.append('red')
+
+df['label'] = labels
+
+print(df)
+
+df.plot(x='x', y='y', c=df['label'], kind='scatter')
diff --git a/scripts/dataset_gen.ipynb b/scripts/dataset_gen.ipynb
diff --git a/scripts/dataset_gen.py b/scripts/dataset_gen.py
@@ -0,0 +1,37 @@
+from sklearn.datasets import make_blobs
+import pandas as pd
+import numpy as np
+from pandas.plotting._matplotlib import scatter_matrix
+from matplotlib import pyplot
+from pandas import DataFrame
+
+dimension = 2
+samples = 1000
+centers = 4
+
+points, y = make_blobs(n_samples=samples, centers=centers, n_features=dimension)
+
+with open("dataset_2d_4centr.txt", "w") as file:
+    for point in points:
+        for value in range(dimension):
+            if value == (dimension - 1):
+                file.write(str(round(point[value], 4)))
+            else:
+                file.write(str(round(point[value], 4)) + ",")
+        file.write("\n")
+
+data = np.array(points)
+
+#plot scatterplot
+df = pd.DataFrame(data, columns=['x_0','x_1'])
+scatter_matrix(df, alpha=0.2, figsize=(10,10))
+
+
+
+df = DataFrame(dict(x=points[:,0], y=points[:,1], label=y))
+colors = {0:'red', 1:'blue', 2:'green', 3:'black', 4:'purple', 5:'pink', 6:'orange'}
+fig, ax = pyplot.subplots()
+grouped = df.groupby('label')
+for key, group in grouped:
+    group.plot(ax=ax, kind='scatter', x='x', y='y', label=key, color=colors[key])
+pyplot.show()