-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPR.py
204 lines (175 loc) · 7.57 KB
/
PR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import math
import io
english_stop = ['A', 'The','a','a','an','the','and','but','if','or','because','as','until','while','of','at','by','for','with','about','against','between','into','through','during','before','after','above','below','to','from','up','down','in','out','on','off','over','under'
,'again','further','then','once','here','there','when','where','why','how'
,'all','any','both','each','few','more','most','other','some','such'
,'no','nor','not','only','own','same','so','than','too','very']
def read_tweets(filename):
tweets = io.open(filename, encoding='utf-8').readlines()
return tweets
class Node:
def __init__(self, word, freqency=1.0):
self.word = word
self.freqency = freqency
self.incoming = [] # node objects
self.outgoing = [] # node objects
def get_incoming_node(self, word):
# print self.incoming, word
for node in self.incoming:
if node.word == word:
return node
return False
def get_outgoing_node(self, word):
for node in self.outgoing:
if node.word == word:
return node
return False
def __repr__(self):
return self.word
def prepend_nodes(node, initial, index):
for word in reversed(initial):
# print 'Node is %s' % node.word
current_word_node = node.get_incoming_node(word)
if current_word_node:
if word in english_stop:
current_word_node.freqency = 0.0
else:
logage = float(math.log(float(age[index])))
repit = float(math.sqrt(float(reput[index])))
logactivity = float(math.log(float(activity[index])))
rtwtcnt = float(math.sqrt(1+float(retweet_count[index])))
w=(logage)*(repit)*(logactivity)*(rtwtcnt)
#w=(current_word_node.freqency+1)*(age[index])
#print current_word_node.freqency+1
#print age[index]
#w=(float(current_word_node.freqency+1))*(float(math.log(age[index])))*(float(math.pow(reput[index]),2))*(float(math.log(activity[index])))*(float(math.sqrt(1+retweet)))
current_word_node.freqency += w
node = current_word_node
else:
new_node = Node(word)
# print new_node.word
# print new_node.freqency
# print new_node.incoming + new_node.outgoing
if word in english_stop:
new_node.freqency = 0
# print "testing before linking:"
# print new_node.incoming
#print node.outgoing
# new_node.outgoing.append(node)
# print "After appending node to outgoing of new_node:"
# print node.incoming
# print node.outgoing
# print new_node.outgoing
# print new_node.incoming
node.incoming.append(new_node)
#print 'For '+ node.word + ' ' + node.incoming + '| For ' + new_node.word + ' ' + new_node.outgoing
# print "After appending new_node to incoming of node:"
# print node.incoming
# print node.outgoing
# print new_node.outgoing
# print new_node.incoming
# print 'Node Added: ' + new_node.word + ' Prepended to: ' + node.word + '\n'
node = new_node
def append_nodes(node, following, index):
for word in following:
# print 'Node is %s' % node.word
current_word_node = node.get_outgoing_node(word)
if current_word_node:
# print 'current_word_node is', current_word_node
if word in english_stop:
current_word_node.freqency = 0.0
else:
lg = float(math.log(float(age[index])))
pw = float(reput[index])
actvty = float(math.log(float(activity[index])))
rtwt = float(math.sqrt(float(math.sqrt(1+float(retweet_count[index])))))
w=(lg)*(pw)*(actvty)*(rtwt)
#print current_word_node.freqency+1
current_word_node.freqency += float(w/10.0)
node = current_word_node
else:
new_node = Node(word)
if word in english_stop:
new_node.freqency = 0
# new_node.incoming.append(node)
# print 'Node is', node.word, 'New Node is', new_node.word
node.outgoing.append(new_node)
# print 'Node Added: ' + new_node.word + ' Appended to: ' + node.word + '\n'
node = new_node
def make_graph(tweets, root_phrase):
root_node = Node(root_phrase)
root_node.freqency = 0
index=0
for tweet in tweets:
if root_phrase in tweet:
initial, following = tweet[:tweet.find(root_phrase)], tweet[tweet.find(root_phrase)+len(root_phrase):]
initial = initial.split()
following = following.split()
prepend_nodes(root_node, initial, index)
append_nodes(root_node, following, index)
index += 1
return root_node
def maximux_path_outgoing(root_node):
if not len(root_node.outgoing):
return (root_node.freqency, root_node.word)
all_cases = [maximux_path_outgoing(child) for child in root_node.outgoing]
weights_all_cases = [x[0] for x in all_cases]
index = weights_all_cases.index(max(weights_all_cases))
freqency, word = all_cases[index]
return (root_node.freqency + freqency, root_node.word + ' ' + word)
def maximux_path_incoming(root_node):
if not len(root_node.incoming):
return (root_node.freqency, root_node.word)
all_cases = [maximux_path_incoming(child) for child in root_node.incoming]
weights_all_cases = [x[0] for x in all_cases]
index = weights_all_cases.index(max(weights_all_cases))
freqency, word = all_cases[index]
return (root_node.freqency + freqency, root_node.word + ' ' + word)
def computeStuff(selected_topic):
tweets = read_tweets('./Data/fetchedTweets.txt')
global age, reput, activity, retweet_count, text
age=[]
reput=[]
activity=[]
retweet_count=[]
text=[]
for tweet in tweets:
age_obj, reput_obj, activity_obj, retweet_count_obj, text_obj=tweet.split("\t")
age.append(age_obj)
reput.append(reput_obj)
activity.append(activity_obj)
retweet_count.append(retweet_count_obj)
text.append(text_obj)
root_node = make_graph(text,selected_topic)
#print 'Graph made\n'
freq0, str0 = maximux_path_incoming(root_node)
freq1, str1 = maximux_path_outgoing(root_node)
pre_pure = " ".join(reversed(str0.split()[len(root_node.word.split()):]))
pre_pure = pre_pure + " " + selected_topic
#print freq0, freq1
#print pre_pure, root_node.word
#print str1
#print "1st partial summary:"
res = []
new_root_node=make_graph(tweets, str1)
freq00, str00 = maximux_path_incoming(root_node)
freq11, str11 = maximux_path_outgoing(root_node)
pre_pure_new = " ".join(reversed(str00.split()[len(new_root_node.word.split()):]))
pre_pure_new = pre_pure_new + " " + str1
if freq00 > freq11:
#res.append(pre_pure_new)
res.append(new_root_node.word)
else:
res.append(str11)
#print "2nd partial summary:"
new_root_node=make_graph(tweets, pre_pure)
freq2, str2=maximux_path_incoming(new_root_node)
freq3, str3=maximux_path_outgoing(new_root_node)
pure_str2= " ".join(reversed(str2.split()[len(new_root_node.word.split()):]))
pure_str2 = pure_str2 + " " + pre_pure
if freq2>freq3:
#res.append(pure_str2)
res.append(new_root_node.word)
else:
res.append(str3)
return res