-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtweets_sentiment_predict_and_plot_pie.py
64 lines (46 loc) · 1.52 KB
/
tweets_sentiment_predict_and_plot_pie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import matplotlib.pyplot as plt
import os
import csv
import pandas as pd
import numpy as np
from sklearn import *
from scipy import stats
import pickle
def read_csv_data(fname):
txtdata = []
classes = []
with open(fname, 'r', encoding='UTF-8') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
# get the text
txtdata.append(row[1])
return (txtdata, classes)
def write_csv_kaggle_sub(fname, Y):
tmp = [['Id', 'Prediction']]
# add ID numbers for each Y
for (i,y) in enumerate(Y):
tmp2 = [(i+1), y]
tmp.append(tmp2)
# write CSV file
with open(fname, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(tmp)
cntvect = pickle.load(open('vectorizer.pickle', 'rb'))
model = pickle.load(open('classification.model', 'rb'))
path = ""
test = "tom"
filename = "predict2_" + test + ".csv"
(testtxt, _) = read_csv_data("result_" + test + ".csv")
testXbow = cntvect.transform(testtxt)
predY = model.best_estimator_.predict(testXbow)
write_csv_kaggle_sub(filename, predY)
df = pd.read_csv(os.path.join(path, filename))
freq = df.groupby(df['Prediction']).count().unstack()
print (freq)
label = ["neutral", "negative", "positive"]
explode = [0, 0, 0.15]
plt.pie(freq, labels = label, explode = explode,
autopct='%1.1f%%',
wedgeprops={'edgecolor': 'black'})
plt.title(test.upper())
plt.show()