-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRoleDictionary.py
84 lines (61 loc) · 2.7 KB
/
RoleDictionary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import re
from ClusterSImilarity import FuzzyClusterSimilarity
import pprint
class RoleDictionary:
actor_filenames= ['Phoenix.Countries.actors.txt',
'Phoenix.International.actors.txt',
'Phoenix.MilNonState.actors.txt']
folder = 'data/dictionaries'
actor_set = set()
actor_roles = {}
similarityMeasure = FuzzyClusterSimilarity()
def __init__(self, similarityMeasure=FuzzyClusterSimilarity()):
self.similarityMeasure = similarityMeasure
for filename in self.actor_filenames:
fs = open(self.folder + "/" + filename)
current_roles = set()
current_actors = []
for line in fs:
line = line.strip()
if line.startswith('#') or len(line.strip()) == 0: # if it is a comment
continue
line = line.split('#')[0]
words = line.strip().split("\t")
for i in range(0, len(words)):
w = words[i].strip()
if not w.startswith('+') and not w.strip().startswith('['):
#print "NEW ACTOR ", current_actors
for actor in current_actors:
if actor in self.actor_roles:
self.actor_roles[actor].union(current_roles)
else:
self.actor_roles[actor] = current_roles
#self.actor_roles[actor] = current_roles
current_actors = []
current_roles = set()
current_actors.append(w.replace('_',' ').strip())
elif w.startswith('+'):
#line.replace()
current_actors.append(w.replace('+','').replace("_"," ").strip())
else:
matched = re.match(r'\[[^\]]*\]',w)
role_with_date = matched.group(0)
current_roles.add(role_with_date[1:len(role_with_date)-1].split(' ')[0])
#print current_roles
fs.close()
#pprint.pprint( self.actor_roles)
def roles(self, actorname):
temp = actorname.replace('_',' ').strip()
# maxKey = None
# maxMatch = 100
# for key in self.actor_roles:
# match = self.similarityMeasure.measure(key, temp)
# if match > maxMatch:
# maxKey = key
# maxMatch = match
return {temp: self.actor_roles.get(temp)}
print 'Running'
roleDict = RoleDictionary()
print "initialized"
#roleDict.contains('test')
print roleDict.roles('BARACK_OBAMA')