-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspellchecker.py~
More file actions
83 lines (71 loc) · 3.19 KB
/
spellchecker.py~
File metadata and controls
83 lines (71 loc) · 3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import sys
from copy import copy
from time import time
from Engine.Classifier.QueryClassifier import QueryClassifier
from Engine.ErrorModel import ErrorModel
from Engine.Generators.GrammarGenerator import GrammarGenerator
from Engine.Generators.JoinGenerator import JoinGenerator
from Engine.Generators.LayoutGenerator import LayoutGenerator
from Engine.Generators.SplitGenerator import SplitGenerator
from Engine.TextFormatter import TextFormatter
from Engine.utils.utils import load_obj, print_error
if __name__ == "__main__":
lm = load_obj("LanguageModel")
em = ErrorModel(load_obj("Trie"))
cl = load_obj("classifier")
qc = QueryClassifier(cl, lm)
layoutGenerator = LayoutGenerator()
splitGenerator = SplitGenerator()
joinGenerator = JoinGenerator()
grammarGenerator = GrammarGenerator(em, lm)
i = 0
for s in sys.stdin:
t1 = time()
i+=1
textFormatter = TextFormatter(s)
words = textFormatter.get_query_list()
query = textFormatter.text
print query.encode("utf-8")
print_error(s)
"""
if qc.is_correct(query, words):
print query.encode("utf-8")
#print_error("CORRECT")#.format(query.encode("utf-8"), correction[probs.index(max(probs))]))
#print_error("{} \t OK".format(s))
else:
correction = []
probs = []
# =============================== Layout
keybordChangedWords = layoutGenerator.generate_correction(words)
queryKeybord = textFormatter.format_text(keybordChangedWords)
if qc.is_correct(queryKeybord, keybordChangedWords):
correction.append(queryKeybord.encode("utf-8"))
probs.append(lm.get_prob(keybordChangedWords))
# ================================= Grammar
grammas = grammarGenerator.generate_correction(words)
for gramma in grammas:
queryGramma = textFormatter.format_text(gramma)
if qc.is_correct(queryGramma, gramma):
correction.append(queryGramma.encode("utf-8"))
probs.append(lm.get_prob(gramma))
# ================================ Join
joins = joinGenerator.generate_joins(words)
for join in joins:
queryJoin = u" ".join(join)
if qc.is_correct(queryJoin, join):
correction.append(queryJoin.encode("utf-8"))
probs.append(lm.get_prob(join))
# ================================ Split
splits = splitGenerator.generate_splits(words)
for split in splits:
querySplit = u" ".join(split)
if qc.is_correct(querySplit, split):
correction.append(querySplit.encode("utf-8"))
probs.append(lm.get_prob(split))
if len(correction) == 0:
print query.encode("utf-8")
#print_error("{} \t {}".format(query.encode("utf-8"), "not found=("))
else:
print correction[probs.index(max(probs))]
#print_error("{} \t {}".format(query.encode("utf-8"), correction[probs.index(max(probs))]))
"""