|
| 1 | +/*It is a kind of dictionary-matching algorithm that locates elements |
| 2 | +of a finite set of strings (the "dictionary") within an input text. |
| 3 | +It matches all strings simultaneously.*/ |
| 4 | + |
| 5 | +using namespace std; |
| 6 | +#include <bits/stdc++.h> |
| 7 | + |
| 8 | +const int MAXSTATE = 6 * 50 + 10; |
| 9 | +// Max number of states in the matching machine. |
| 10 | +// Should be equal to the sum of the length of all keywords. |
| 11 | + |
| 12 | +const int MAXCHAR = 26; |
| 13 | +// Number of characters in the alphabet. |
| 14 | +int out[MAXSTATE]; |
| 15 | +// Output for each state, as a bitwise mask. |
| 16 | +int fail[MAXSTATE]; |
| 17 | +// failure function |
| 18 | +int g[MAXSTATE][MAXCHAR]; |
| 19 | +// goto function, or -1 if fail. |
| 20 | + |
| 21 | +int buildMachine(const vector<string> &words, char lowestChar = 'a', char highestChar = 'z') |
| 22 | +{ |
| 23 | + memset(out, 0, sizeof out); //intializing out with 0s |
| 24 | + memset(fail, -1, sizeof fail); //intializing fail with -1s |
| 25 | + memset(g, -1, sizeof g); //intializing g with -1s |
| 26 | + int states = 1; // Initially, we just have the 0 state |
| 27 | + |
| 28 | + for (int i = 0; i < words.size(); i++) |
| 29 | + { |
| 30 | + const string &keyword = words[i]; |
| 31 | + int currentState = 0; |
| 32 | + for (int j = 0; j < keyword.size(); j++) |
| 33 | + { |
| 34 | + int c = keyword[j] - lowestChar; |
| 35 | + if (g[currentState][c] == -1) |
| 36 | + { |
| 37 | + // Allocate a new node |
| 38 | + g[currentState][c] = states++; |
| 39 | + |
| 40 | + } |
| 41 | + currentState = g[currentState][c]; |
| 42 | + } |
| 43 | + out[currentState] |= (1 << i); |
| 44 | + // There's a match of keywords[i] at node currentState. |
| 45 | + } |
| 46 | + // State 0 should have an outgoing edge for all characters. |
| 47 | + for (int c = 0; c < MAXCHAR; c++) |
| 48 | + { |
| 49 | + if (g[0][c] == -1) |
| 50 | + { |
| 51 | + g[0][c] = 0; |
| 52 | + } |
| 53 | + } |
| 54 | + //building the failure function |
| 55 | + queue<int> q; |
| 56 | + for (int c = 0; c <= highestChar - lowestChar; c++) |
| 57 | + { |
| 58 | + // Iterate over every possible input. All nodes s of depth 1 have fail[s] = 0 |
| 59 | + if (g[0][c] != -1 && g[0][c] != 0) |
| 60 | + { |
| 61 | + fail[g[0][c]] = 0; |
| 62 | + q.push(g[0][c]); |
| 63 | + } |
| 64 | + } |
| 65 | + |
| 66 | + while (q.size()) |
| 67 | + { |
| 68 | + int state = q.front(); |
| 69 | + q.pop(); |
| 70 | + for (int c = 0; c <= highestChar - lowestChar; c++) |
| 71 | + { |
| 72 | + if (g[state][c] != -1) |
| 73 | + { |
| 74 | + int failure = fail[state]; |
| 75 | + int failure = fail[state]; |
| 76 | + while (g[failure][c] == -1) |
| 77 | + { |
| 78 | + failure = fail[failure]; |
| 79 | + } |
| 80 | + failure = g[failure][c]; |
| 81 | + fail[g[state][c]] = failure; |
| 82 | + out[g[state][c]] |= out[failure]; // Merge out values |
| 83 | + q.push(g[state][c]); |
| 84 | + } |
| 85 | + } |
| 86 | + } |
| 87 | + return states; |
| 88 | +} |
| 89 | + |
| 90 | +int findNextState(int currentState, char nextInput, char lowestChar = 'a') |
| 91 | +{ |
| 92 | + int answer = currentState; |
| 93 | + int c = nextInput - lowestChar; |
| 94 | + while (g[answer][c] == -1) |
| 95 | + { |
| 96 | + answer = fail[answer]; |
| 97 | + } |
| 98 | + return g[answer][c]; |
| 99 | +} |
| 100 | + |
| 101 | +int main() |
| 102 | +{ |
| 103 | + vector<string> keywords; |
| 104 | + cout<<"Enter the number of keywords you want to enter"; |
| 105 | + int n; |
| 106 | + for (int i = 0; i < n; i++) |
| 107 | + { |
| 108 | + string temp; |
| 109 | + cin >> temp; |
| 110 | + keywords.push_back(temp); |
| 111 | + } |
| 112 | + cout<<"Enter text"; |
| 113 | + string text; |
| 114 | + cin>>text; |
| 115 | + buildMachine(keywords, 'a', 'z'); |
| 116 | + int currentState = 0; |
| 117 | + for (int i = 0; i < text.size(); i++) |
| 118 | + { |
| 119 | + currentState = findNextState(currentState, text[i], 'a'); |
| 120 | + if (out[currentState] == 0) |
| 121 | + { |
| 122 | + continue; // Nothing new, moving on to the next character. |
| 123 | + } |
| 124 | + for (int j = 0; j < keywords.size(); j++) |
| 125 | + { |
| 126 | + if (out[currentState] & (1 << j)) |
| 127 | + { |
| 128 | + // Matched keywords[j] |
| 129 | + cout << "Keyword " << keywords[j] << " appears from " << i - keywords[j].size() + 1 << " to " << i << endl; |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | + return 0; |
| 134 | +} |
| 135 | + |
| 136 | +/* |
| 137 | +Input |
| 138 | +
|
| 139 | +keywords ={"he",she","hers",his"} |
| 140 | +text = "ahishers" |
| 141 | +
|
| 142 | +Output |
| 143 | +
|
| 144 | +Keyword his appears from 1 to 3 |
| 145 | +Keyword he appears from 4 to 5 |
| 146 | +Keyword she appears from 3 to 5 |
| 147 | +Keyword hers appears from 4 to 7 |
| 148 | +
|
| 149 | +*/ |
| 150 | +} |
0 commit comments