Skip to content

Commit 2e71476

Browse files
Add Aho-Corasick in Python (jainaman224#2962)
1 parent 4f2b73b commit 2e71476

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed

Aho-Corasick/Aho-Corasick.py

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# AhoNode class
2+
class AhoNode:
3+
def __init__(self):
4+
self.goto = {}
5+
self.out = []
6+
self.fail = None
7+
8+
9+
# creating aho forest
10+
def aho_create_forest(patterns):
11+
root = AhoNode()
12+
13+
for path in patterns:
14+
node = root
15+
for symbol in path:
16+
node = node.goto.setdefault(symbol, AhoNode())
17+
node.out.append(path)
18+
return root
19+
20+
21+
# Creating aho automata
22+
def aho_create_statemachine(patterns):
23+
root = aho_create_forest(patterns)
24+
queue = []
25+
for node in root.goto.values():
26+
queue.append(node)
27+
node.fail = root
28+
29+
while len(queue) > 0:
30+
rnode = queue.pop(0)
31+
32+
for key, unode in rnode.goto.items():
33+
queue.append(unode)
34+
fnode = rnode.fail
35+
while fnode is not None and key not in fnode.goto:
36+
fnode = fnode.fail
37+
unode.fail = fnode.goto[key] if fnode else root
38+
unode.out += unode.fail.out
39+
40+
return root
41+
42+
43+
def aho_find_all(s, root, callback):
44+
node = root
45+
46+
for i in range(len(s)):
47+
while node is not None and s[i] not in node.goto:
48+
node = node.fail
49+
if node is None:
50+
node = root
51+
continue
52+
node = node.goto[s[i]]
53+
for pattern in node.out:
54+
callback(i - len(pattern) + 1, pattern)
55+
56+
57+
# Printing the position of pattern found
58+
def print_pattern(pos, patterns):
59+
print("At pos %s found pattern: %s" % (pos, patterns))
60+
61+
62+
if __name__ == "__main__":
63+
patterns = []
64+
65+
# taking inputs
66+
n = int(input("Enter no. of elements :- "))
67+
for i in range(0, n):
68+
ele = input("Enter element :- ")
69+
patterns.append(ele)
70+
s = input("Enter text :- ")
71+
72+
root = aho_create_statemachine(patterns)
73+
aho_find_all(s, root, print_pattern)
74+
75+
76+
"""
77+
INPUT:
78+
n=6
79+
patterns = ['a', 'ab', 'abc', 'bc', 'c', 'cba']
80+
s = "abcba"
81+
82+
OUTPUT:
83+
At pos 0 found pattern: a
84+
At pos 0 found pattern: ab
85+
At pos 0 found pattern: abc
86+
At pos 1 found pattern: bc
87+
At pos 2 found pattern: c
88+
At pos 2 found pattern: cba
89+
"""

0 commit comments

Comments
 (0)