-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathStats_Plotting_Learning_Data.py
113 lines (104 loc) · 3.75 KB
/
Stats_Plotting_Learning_Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import matplotlib.pyplot as plt
import os
def get_num_revs(product_path):
#Count the number of reviews for a product
count = 0
with open(product_path, 'r') as filep:
for item in filep:
count+=1
return count
def get_num_revs_avg_rating(product_path):
#Count the number of reviews for a product and get the average num revs
count = 0
avg_rating = 0
with open(product_path, 'r') as filep:
for line in filep:
row = line.split('\t')
avg_rating+=float(row[5])
count+=1
avg_rating/=count
return count,avg_rating
def get_num_revs_original_case(source_category_path,productBaseDirectory):
index = 0
# products_with_small_num_revs = [] # <70
# products_with_large_num_revs = [] # >70
random_product_distribution = []
x = []
y = []
with open(source_category_path, 'r') as filep:
for item in filep:
line = item.split('\t')
productid = line[0]
product_path = productBaseDirectory + productid + ".txt"
num_reviews = get_num_revs(product_path)
random_product_distribution.append((productid, num_reviews))
x.append(index + 1)
print(index)
y.append(num_reviews)
'''if num_reviews <= 70:
products_with_small_num_revs.append((productid, num_reviews))
else:
products_with_large_num_revs.append((productid, num_reviews))'''
index += 1
return x, y
def get_num_revs_large_to_small_distribution(source_category_path,productBaseDirectory):
index = 0
products_with_small_num_revs = [] # <70
products_with_large_num_revs = [] # >70
random_product_distribution = []
x=[]
y=[]
with open(source_category_path, 'r') as filep:
for item in filep:
line = item.split('\t')
productid = line[0]
product_path = productBaseDirectory+ productid + ".txt"
num_reviews = get_num_revs(product_path)
#random_product_distribution.append((productid, num_reviews))
#x.append(index+1)
#print(index)
#y.append(num_reviews)
if num_reviews <= 70:
products_with_small_num_revs.append((productid, num_reviews))
else:
products_with_large_num_revs.append((productid, num_reviews))
#index += 1
for pro in products_with_large_num_revs:
x.append(index)
y.append(pro[1])
print(index)
index+=1
for pro in products_with_small_num_revs:
x.append(index)
y.append(pro[1])
print(index)
index+=1
return x,y
def get_num_revs_distribution_per_Category(num_revs_base_directory):
lst = os.listdir(num_revs_base_directory)
for file in lst:
file_path = num_revs_base_directory + file
num_greater = 0
num_less=0
with open(file_path, 'r') as filep:
for item in filep:
line = item.split('\t')
if int(line[1])>50:
num_greater+=1
else:
num_less+=1
print(file+" num_greater "+str(num_greater))
return
'''category_name = "Industrial & Scientific"
source_category_path="d:\Yassien_PhD\categories/"+category_name+".txt"
productBaseDirectory="d:\Yassien_PhD/Product_Reviews/"
x,y=get_num_revs_large_to_small_distribution(source_category_path,productBaseDirectory)
#plt.scatter(x, y, alpha=0.5)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlabel('Products')
ax.set_ylabel('Num Reviews')
ax.scatter(x, y)
fig.suptitle(category_name, fontsize=14, fontweight='bold')
plt.show()'''
get_num_revs_distribution_per_Category("D:/Yassien_PhD/Number_of_reviews_per_product/")