|
1 | 1 | /**
|
2 |
| - * \file |
3 |
| - * \brief The [Knuth-Morris-Pratt |
| 2 | + * @file |
| 3 | + * @brief The [Knuth-Morris-Pratt |
4 | 4 | * Algorithm](https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm) for
|
5 | 5 | * finding a pattern within a piece of text with complexity O(n + m)
|
6 |
| - * |
| 6 | + * @details |
7 | 7 | * 1. Preprocess pattern to identify any suffixes that are identical to
|
8 | 8 | * prefixes. This tells us where to continue from if we get a mismatch between a
|
9 | 9 | * character in our pattern and the text.
|
10 | 10 | * 2. Step through the text one character at a time and compare it to a
|
11 | 11 | * character in the pattern updating our location within the pattern if
|
12 | 12 | * necessary
|
| 13 | + * @author [Yancey](https://github.com/Yancey2023) |
13 | 14 | */
|
14 | 15 |
|
15 |
| -#include <iostream> |
16 |
| -#ifdef _MSC_VER |
17 |
| -#include <string> // use this for MS Visual C++ |
18 |
| -#else |
19 |
| -#include <cstring> |
20 |
| -#endif |
21 |
| -#include <vector> |
| 16 | +#include <cassert> /// for assert |
| 17 | +#include <iostream> /// for IO operations |
| 18 | +#include <string> /// for std::string |
| 19 | +#include <vector> /// for std::vector |
22 | 20 |
|
23 |
| -/** \namespace string_search |
24 |
| - * \brief String search algorithms |
| 21 | +/** |
| 22 | + * @namespace string_search |
| 23 | + * @brief String search algorithms |
25 | 24 | */
|
26 | 25 | namespace string_search {
|
27 | 26 | /**
|
28 |
| - * Generate the partial match table aka failure function for a pattern to |
| 27 | + * @brief Generate the partial match table aka failure function for a pattern to |
29 | 28 | * search.
|
30 |
| - * \param[in] pattern text for which to create the partial match table |
31 |
| - * \returns the partial match table as a vector array |
| 29 | + * @param pattern text for which to create the partial match table |
| 30 | + * @returns the partial match table as a vector array |
32 | 31 | */
|
33 |
| -std::vector<int> getFailureArray(const std::string &pattern) { |
34 |
| - int pattern_length = pattern.size(); |
35 |
| - std::vector<int> failure(pattern_length + 1); |
36 |
| - failure[0] = -1; |
37 |
| - int j = -1; |
38 |
| - |
| 32 | +std::vector<size_t> getFailureArray(const std::string &pattern) { |
| 33 | + size_t pattern_length = pattern.size(); |
| 34 | + std::vector<size_t> failure(pattern_length + 1); |
| 35 | + failure[0] = std::string::npos; |
| 36 | + size_t j = std::string::npos; |
39 | 37 | for (int i = 0; i < pattern_length; i++) {
|
40 |
| - while (j != -1 && pattern[j] != pattern[i]) { |
| 38 | + while (j != std::string::npos && pattern[j] != pattern[i]) { |
41 | 39 | j = failure[j];
|
42 | 40 | }
|
43 |
| - j++; |
44 |
| - failure[i + 1] = j; |
| 41 | + failure[i + 1] = ++j; |
45 | 42 | }
|
46 | 43 | return failure;
|
47 | 44 | }
|
48 | 45 |
|
49 | 46 | /**
|
50 |
| - * KMP algorithm to find a pattern in a text |
51 |
| - * \param[in] pattern string pattern to search |
52 |
| - * \param[in] text text in which to search |
53 |
| - * \returns `true` if pattern was found |
54 |
| - * \returns `false` if pattern was not found |
| 47 | + * @brief KMP algorithm to find a pattern in a text |
| 48 | + * @param pattern string pattern to search |
| 49 | + * @param text text in which to search |
| 50 | + * @returns the starting index of the pattern if found |
| 51 | + * @returns `std::string::npos` if not found |
55 | 52 | */
|
56 |
| -bool kmp(const std::string &pattern, const std::string &text) { |
57 |
| - int text_length = text.size(), pattern_length = pattern.size(); |
58 |
| - std::vector<int> failure = getFailureArray(pattern); |
59 |
| - |
60 |
| - int k = 0; |
61 |
| - for (int j = 0; j < text_length; j++) { |
62 |
| - while (k != -1 && pattern[k] != text[j]) { |
| 53 | +size_t kmp(const std::string &pattern, const std::string &text) { |
| 54 | + if (pattern.empty()) { |
| 55 | + return 0; |
| 56 | + } |
| 57 | + std::vector<size_t> failure = getFailureArray(pattern); |
| 58 | + size_t text_length = text.size(); |
| 59 | + size_t pattern_length = pattern.size(); |
| 60 | + size_t k = 0; |
| 61 | + for (size_t j = 0; j < text_length; j++) { |
| 62 | + while (k != std::string::npos && pattern[k] != text[j]) { |
63 | 63 | k = failure[k];
|
64 | 64 | }
|
65 |
| - k++; |
66 |
| - if (k == pattern_length) |
67 |
| - return true; |
| 65 | + if (++k == pattern_length) { |
| 66 | + return j - k + 1; |
| 67 | + } |
68 | 68 | }
|
69 |
| - return false; |
| 69 | + return std::string::npos; |
70 | 70 | }
|
71 | 71 | } // namespace string_search
|
72 | 72 |
|
73 | 73 | using string_search::kmp;
|
74 | 74 |
|
75 |
| -/** Main function */ |
76 |
| -int main() { |
77 |
| - std::string text = "alskfjaldsabc1abc1abc12k23adsfabcabc"; |
78 |
| - std::string pattern = "abc1abc12l"; |
79 |
| - |
80 |
| - if (kmp(pattern, text) == true) { |
81 |
| - std::cout << "Found" << std::endl; |
82 |
| - } else { |
83 |
| - std::cout << "Not Found" << std::endl; |
84 |
| - } |
| 75 | +/** |
| 76 | + * @brief self-test implementations |
| 77 | + * @returns void |
| 78 | + */ |
| 79 | +static void tests() { |
| 80 | + assert(kmp("abc1abc12l", "alskfjaldsabc1abc1abc12k2") == std::string::npos); |
| 81 | + assert(kmp("bca", "abcabc") == 1); |
| 82 | + assert(kmp("World", "helloWorld") == 5); |
| 83 | + assert(kmp("c++", "his_is_c++") == 7); |
| 84 | + assert(kmp("happy", "happy_coding") == 0); |
| 85 | + assert(kmp("", "pattern is empty") == 0); |
85 | 86 |
|
86 |
| - text = "abcabc"; |
87 |
| - pattern = "bca"; |
88 |
| - if (kmp(pattern, text) == true) { |
89 |
| - std::cout << "Found" << std::endl; |
90 |
| - } else { |
91 |
| - std::cout << "Not Found" << std::endl; |
92 |
| - } |
| 87 | + // this lets the user know that the tests have passed |
| 88 | + std::cout << "All KMP algorithm tests have successfully passed!\n"; |
| 89 | +} |
93 | 90 |
|
| 91 | +/* |
| 92 | + * @brief Main function |
| 93 | + * @returns 0 on exit |
| 94 | + */ |
| 95 | +int main() { |
| 96 | + tests(); |
94 | 97 | return 0;
|
95 | 98 | }
|
0 commit comments