-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathleskAlgorithm.py
70 lines (62 loc) · 2.85 KB
/
leskAlgorithm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#Author: Nurendra Choudhary.
#Algorithm Reference: http://en.wikipedia.org/wiki/Lesk_algorithm
from nltk.corpus import wordnet
from nltk.tokenize.punkt import PunktWordTokenizer
import sys
functionwords = ['about', 'across', 'against', 'along', 'around', 'at',
'behind', 'beside', 'besides', 'by', 'despite', 'down',
'during', 'for', 'from', 'in', 'inside', 'into', 'near', 'of',
'off', 'on', 'onto', 'over', 'through', 'to', 'toward',
'with', 'within', 'without', 'anything', 'everything',
'anyone', 'everyone', 'ones', 'such', 'it', 'itself',
'something', 'nothing', 'someone', 'the', 'some', 'this',
'that', 'every', 'all', 'both', 'one', 'first', 'other',
'next', 'many', 'much', 'more', 'most', 'several', 'no', 'a',
'an', 'any', 'each', 'no', 'half', 'twice', 'two', 'second',
'another', 'last', 'few', 'little', 'less', 'least', 'own',
'and', 'but', 'after', 'when', 'as', 'because', 'if', 'what',
'where', 'which', 'how', 'than', 'or', 'so', 'before', 'since',
'while', 'although', 'though', 'who', 'whose', 'can', 'may',
'will', 'shall', 'could', 'be', 'do', 'have', 'might', 'would',
'should', 'must', 'here', 'there', 'now', 'then', 'always',
'never', 'sometimes', 'usually', 'often', 'therefore',
'however', 'besides', 'moreover', 'though', 'otherwise',
'else', 'instead', 'anyway', 'incidentally', 'meanwhile']
def overlapcontext( synset, sentence ):
gloss = set(PunktWordTokenizer().tokenize(synset.definition()))
for i in synset.examples():
gloss.union(i)
gloss = gloss.difference( functionwords )
if isinstance(sentence, str):
sentence = set(sentence.split(" "))
elif isinstance(sentence, list):
sentence = set(sentence)
elif isinstance(sentence, set):
pass
else:
return
sentence = sentence.difference( functionwords )
return len( gloss.intersection(sentence) )
def lesk( word, sentence ):
bestsense = None
maxoverlap = 0
word=wordnet.morphy(word) if wordnet.morphy(word) is not None else word
for sense in wordnet.synsets(word):
overlap = overlapcontext(sense,sentence)
for h in sense.hyponyms():
overlap += overlapcontext( h, sentence )
if overlap > maxoverlap:
maxoverlap = overlap
bestsense = sense
return bestsense
sentence = raw_input("Enter the Sentence (or) Context :")
word = raw_input("Enter the word :")
a = lesk(word,sentence)
print "\n\nSynset:",a
if a is not None:
print "Meaning:",a.definition()
num=0
print "\nExamples:"
for i in a.examples():
num=num+1
print str(num)+'.'+')',i