Skip to content
This repository was archived by the owner on Jun 8, 2021. It is now read-only.

Commit

Permalink
python scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
matildao-pane committed Jul 11, 2020
1 parent 387e88d commit 869362a
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 187 deletions.
70 changes: 0 additions & 70 deletions scripts/benchmark.ipynb

This file was deleted.

58 changes: 58 additions & 0 deletions scripts/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from sklearn.cluster import KMeans
import numpy as np
import time
from statistics import mean
import pandas as pd


points = []
clusters= 4

start_milli_time = round(time.time() * 1000, 4)
with open("dataset_2d_4centr.txt", "r") as file:
for line in file:
comps = line.split(",")
point = [float(comps[i]) for i in range (len(comps)) ]
points.append(point)

dataset = np.array(points)
kmeans = KMeans(n_clusters=clusters, init='random', precompute_distances=False, random_state=0).fit(dataset)

end_milli_time = round(time.time() * 1000, 4)
execution_time = round(end_milli_time - start_milli_time, 4)

#with open('output_2d.txt', 'a') as f:
# f.write("execution time: " + str(execution_time) + ' ms \n')
# f.write('centroids:\n'+ str(kmeans.cluster_centers_) + '\n')
# f.write('n_iter: ' + str(kmeans.n_iter_) + '\n\n')



print(str(execution_time))
print(str(kmeans.cluster_centers_))
print(str(kmeans.n_iter_))



#plot black points and red centroids
cent = kmeans.cluster_centers_
centr = np.array(cent)

#concatena i punti con i centroidi
c = np.vstack([dataset, centr])

df = pd.DataFrame({'x': c[:, 0], 'y': c[:, 1]})


# Declare a list that is to be converted into a column
labels = []
for i in range(0,1000):
labels.append('black')
for i in range(0,4):
labels.append('red')

df['label'] = labels

print(df)

df.plot(x='x', y='y', c=df['label'], kind='scatter')
117 changes: 0 additions & 117 deletions scripts/dataset_gen.ipynb

This file was deleted.

37 changes: 37 additions & 0 deletions scripts/dataset_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from sklearn.datasets import make_blobs
import pandas as pd
import numpy as np
from pandas.plotting._matplotlib import scatter_matrix
from matplotlib import pyplot
from pandas import DataFrame

dimension = 2
samples = 1000
centers = 4

points, y = make_blobs(n_samples=samples, centers=centers, n_features=dimension)

with open("dataset_2d_4centr.txt", "w") as file:
for point in points:
for value in range(dimension):
if value == (dimension - 1):
file.write(str(round(point[value], 4)))
else:
file.write(str(round(point[value], 4)) + ",")
file.write("\n")

data = np.array(points)

#plot scatterplot
df = pd.DataFrame(data, columns=['x_0','x_1'])
scatter_matrix(df, alpha=0.2, figsize=(10,10))



df = DataFrame(dict(x=points[:,0], y=points[:,1], label=y))
colors = {0:'red', 1:'blue', 2:'green', 3:'black', 4:'purple', 5:'pink', 6:'orange'}
fig, ax = pyplot.subplots()
grouped = df.groupby('label')
for key, group in grouped:
group.plot(ax=ax, kind='scatter', x='x', y='y', label=key, color=colors[key])
pyplot.show()

0 comments on commit 869362a

Please sign in to comment.