Skip to content

Commit eb56f0e

Browse files
committed
Merge branch 'KubaSz4-lcp_array'
2 parents 9ac3354 + 2f826ce commit eb56f0e

File tree

8 files changed

+601
-2
lines changed

8 files changed

+601
-2
lines changed

Diff for: README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ This is a collection of algorithms and data structures which I've implement over
4747
* [Implicit Key Treap](src/com/jwetherell/algorithms/data_structures/ImplicitKeyTreap.java)
4848
* [KD Tree (k-dimensional tree or k-d tree)](src/com/jwetherell/algorithms/data_structures/KDTree.java)
4949
* [List [backed by an array or a linked list]](src/com/jwetherell/algorithms/data_structures/List.java)
50+
* [LCP Array (Longest Common Prefix) [backed by a Suffix Array]](src/com/jwetherell/algorithms/data_structures/LCPArray.java)
5051
* [Matrix](src/com/jwetherell/algorithms/data_structures/Matrix.java)
5152
* [Patricia Trie](src/com/jwetherell/algorithms/data_structures/PatriciaTrie.java)
5253
* [Quad-Tree (Point-Region or MX-CIF)](src/com/jwetherell/algorithms/data_structures/QuadTree.java)
@@ -57,6 +58,7 @@ This is a collection of algorithms and data structures which I've implement over
5758
* [Skip List](src/com/jwetherell/algorithms/data_structures/SkipList.java)
5859
* [Splay Tree](src/com/jwetherell/algorithms/data_structures/SplayTree.java)
5960
* [Stack [backed by an array or a linked list]](src/com/jwetherell/algorithms/data_structures/Stack.java)
61+
* [Suffix Array](src/com/jwetherell/algorithms/data_structures/SuffixArray.java)
6062
* [Suffix Tree (Ukkonen's algorithm)](src/com/jwetherell/algorithms/data_structures/SuffixTree.java)
6163
* [Suffix Trie [backed by a Trie]](src/com/jwetherell/algorithms/data_structures/SuffixTrie.java)
6264
* [Treap](src/com/jwetherell/algorithms/data_structures/Treap.java)
@@ -150,7 +152,9 @@ This is a collection of algorithms and data structures which I've implement over
150152
* Graph Traversal
151153
- [Depth First Traversal](src/com/jwetherell/algorithms/graph/DepthFirstTraversal.java)
152154
- [Breadth First Traversal](src/com/jwetherell/algorithms/graph/BreadthFirstTraversal.java)
153-
* [Edmonds Karp](src/com/jwetherell/algorithms/graph/EdmondsKarp.java)
155+
* [Edmonds Karp](src/com/jwetherell/algorithms/graph/EdmondsKarp.java)
156+
* Matching
157+
- [Turbo Matching](src/com/jwetherell/algorithms/graph/TurboMatching.java)
154158

155159
## Search
156160
* Get index of value in array

Diff for: src/com/jwetherell/algorithms/data_structures/KdTree.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
* useful data structure for several applications, such as searches involving a
2222
* multidimensional search key (e.g. range searches and nearest neighbor
2323
* searches). k-d trees are a special case of binary space partitioning trees.
24-
*
24+
* <br>
2525
* @author Justin Wetherell <[email protected]>
2626
* @see <a href="http://en.wikipedia.org/wiki/K-d_tree">K-d_tree (Wikipedia)</a>
2727
*/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package com.jwetherell.algorithms.data_structures;
2+
3+
import java.util.ArrayList;
4+
5+
/**
6+
* In computer science, the longest common prefix array (LCP array) is an auxiliary
7+
* data structure to the suffix array. It stores the lengths of the longest common
8+
* prefixes (LCPs) between all pairs of consecutive suffixes in a sorted suffix array.
9+
* <p>
10+
* https://en.wikipedia.org/wiki/LCP_array
11+
* <br>
12+
* @author Jakub Szarawarski <[email protected]>
13+
* @author Justin Wetherell <[email protected]>
14+
*/
15+
public class LCPArray {
16+
17+
private static final char DEFAULT_END_SEQ_CHAR = '$';
18+
19+
private char END_SEQ_CHAR;
20+
private SuffixArray suffixArrayBuilder;
21+
private ArrayList<Integer> LCP;
22+
23+
public LCPArray(CharSequence sequence){
24+
this(sequence, DEFAULT_END_SEQ_CHAR);
25+
}
26+
27+
public LCPArray(CharSequence sequence, char endChar) {
28+
END_SEQ_CHAR = endChar;
29+
suffixArrayBuilder = new SuffixArray(sequence, END_SEQ_CHAR);
30+
}
31+
32+
public ArrayList<Integer> getLCPArray() {
33+
if (LCP == null)
34+
LCPAlgorithm();
35+
return LCP;
36+
}
37+
38+
private void LCPAlgorithm() {
39+
final ArrayList<Integer> LCPR = getLCPR();
40+
getLCPfromLCPR(LCPR);
41+
}
42+
43+
private ArrayList<Integer> getLCPR() {
44+
final ArrayList<Integer> KMRArray = suffixArrayBuilder.getKMRarray();
45+
final ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray();
46+
final String string = suffixArrayBuilder.getString();
47+
final int length = KMRArray.size();
48+
final ArrayList<Integer> LCPR = new ArrayList<Integer>(); // helper array, LCP[i] = LCPR[suffixArray[i]]
49+
50+
int startingValue = 0;
51+
for (int i=0; i<length; i++) {
52+
if(KMRArray.get(i).equals(0)) {
53+
LCPR.add(0);
54+
startingValue = 0;
55+
} else {
56+
int LCPRValue = startingValue;
57+
final int predecessor = suffixArray.get(KMRArray.get(i)-1);
58+
while (string.charAt(i+LCPRValue) == string.charAt(predecessor+LCPRValue))
59+
LCPRValue++;
60+
LCPR.add(LCPRValue);
61+
startingValue = LCPRValue-1 > 0 ? LCPRValue-1 : 0;
62+
}
63+
}
64+
65+
return LCPR;
66+
}
67+
68+
private void getLCPfromLCPR(ArrayList<Integer> LCPR) {
69+
final ArrayList<Integer> suffixArray = suffixArrayBuilder.getSuffixArray();
70+
final int length = suffixArray.size();
71+
72+
LCP = new ArrayList<Integer>();
73+
LCP.add(null); //no value for LCP[0]
74+
for (int i=1; i<length; i++)
75+
LCP.add(LCPR.get(suffixArray.get(i)));
76+
}
77+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
package com.jwetherell.algorithms.data_structures;
2+
3+
import java.util.ArrayList;
4+
import java.util.Collections;
5+
import java.util.Comparator;
6+
7+
/**
8+
* In computer science, a suffix array is a sorted array of all suffixes of a string.
9+
* It is a data structure used, among others, in full text indices, data compression
10+
* algorithms and within the field of bibliometrics.
11+
* <p>
12+
* https://en.wikipedia.org/wiki/Suffix_array
13+
* <p>
14+
* NOTE: This implementation returns starting indexes instead of full suffixes
15+
* <br>
16+
* @author Jakub Szarawarski <[email protected]>
17+
* @author Justin Wetherell <[email protected]>
18+
*/
19+
public class SuffixArray {
20+
21+
private static final StringBuilder STRING_BUILDER = new StringBuilder();
22+
private static final char DEFAULT_END_SEQ_CHAR = '$';
23+
24+
private char END_SEQ_CHAR;
25+
private String string;
26+
private ArrayList<Integer> suffixArray = null;
27+
private ArrayList<Integer> KMRarray = null;
28+
29+
public SuffixArray(CharSequence sequence) {
30+
this(sequence, DEFAULT_END_SEQ_CHAR);
31+
}
32+
33+
public SuffixArray(CharSequence sequence, char endChar) {
34+
END_SEQ_CHAR = endChar;
35+
string = buildStringWithEndChar(sequence);
36+
}
37+
38+
public ArrayList<Integer> getSuffixArray() {
39+
if (suffixArray == null)
40+
KMRalgorithm();
41+
return suffixArray;
42+
}
43+
44+
/**
45+
* @return inverted suffix array
46+
*/
47+
public ArrayList<Integer> getKMRarray() {
48+
if (KMRarray == null)
49+
KMRalgorithm();
50+
return KMRarray;
51+
}
52+
53+
public String getString(){
54+
return string;
55+
}
56+
57+
/**
58+
* Creates suffix array using KMR algorithm with O(n log^2 n) complexity.
59+
*
60+
* For radius r:
61+
* KMR[i] == k,
62+
* when string[i..i+r-1] is kth r-letter substring of string sorted lexicographically
63+
* KMR is counted for radius = 1,2,4,8 ...
64+
* KMR for radius bigger than string length is the inverted suffix array
65+
*/
66+
private void KMRalgorithm() {
67+
final int length = string.length();
68+
69+
ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>();
70+
ArrayList<Integer> KMR = getBasicKMR(length);
71+
72+
int radius = 1;
73+
while (radius < length) {
74+
KMRinvertedList = getKMRinvertedList(KMR, radius, length);
75+
KMR = getKMR(KMRinvertedList, length);
76+
radius *= 2;
77+
}
78+
79+
KMRarray = new ArrayList<Integer>(KMR.subList(0, length));
80+
suffixArray = new ArrayList<Integer>();
81+
for (KMRsWithIndex kmr : KMRinvertedList)
82+
suffixArray.add(kmr.index);
83+
}
84+
85+
/**
86+
* Creates KMR array for new radius from nearly inverted array.
87+
* Elements from inverted array need to be grouped by substring tey represent.
88+
*
89+
* @param KMRinvertedList indexes are nearly inverted KMR array
90+
* @param length string length
91+
* @return KMR array for new radius
92+
*/
93+
private ArrayList<Integer> getKMR(ArrayList<KMRsWithIndex> KMRinvertedList, int length) {
94+
final ArrayList<Integer> KMR = new ArrayList<Integer>(length*2);
95+
for (int i=0; i<2*length; i++)
96+
KMR.add(-1);
97+
98+
int counter = 0;
99+
for (int i=0; i<length; i++){
100+
if(i>0 && substringsAreEqual(KMRinvertedList, i))
101+
counter++;
102+
KMR.set(KMRinvertedList.get(i).index, counter);
103+
}
104+
105+
return KMR;
106+
}
107+
108+
private boolean substringsAreEqual(ArrayList<KMRsWithIndex> KMRinvertedList, int i) {
109+
return (KMRinvertedList.get(i-1).beginKMR.equals(KMRinvertedList.get(i).beginKMR) == false) ||
110+
(KMRinvertedList.get(i-1).endKMR.equals(KMRinvertedList.get(i).endKMR) == false);
111+
}
112+
113+
/**
114+
* helper method to create KMR array for radius = radius from KMR array for radius = radius/2
115+
*
116+
* @param KMR KMR array for radius = radius/2
117+
* @param radius new radius
118+
* @param length string length
119+
* @return list of KMRsWithIndex which indexes are nearly inverted KMR array
120+
*/
121+
private ArrayList<KMRsWithIndex> getKMRinvertedList(ArrayList<Integer> KMR, int radius, int length) {
122+
final ArrayList<KMRsWithIndex> KMRinvertedList = new ArrayList<KMRsWithIndex>();
123+
for (int i=0; i<length; i++)
124+
KMRinvertedList.add(new KMRsWithIndex(KMR.get(i), KMR.get(i+radius), i));
125+
126+
Collections.sort(KMRinvertedList,
127+
new Comparator<KMRsWithIndex>() {
128+
@Override
129+
public int compare(KMRsWithIndex A, KMRsWithIndex B) {
130+
if (A.beginKMR.equals(B.beginKMR) == false)
131+
return A.beginKMR.compareTo(B.beginKMR);
132+
if (A.endKMR.equals(B.endKMR) == false)
133+
return A.endKMR.compareTo(B.endKMR);
134+
return A.index.compareTo(B.index);
135+
}
136+
}
137+
);
138+
return KMRinvertedList;
139+
}
140+
141+
/**
142+
* KMR array for radius=1, instead of initial natural numbers ascii codes are used
143+
*
144+
* @param length length of string
145+
* @return pseudo KMR array for radius=1
146+
*/
147+
private ArrayList<Integer> getBasicKMR(int length) {
148+
final ArrayList<Integer> result = new ArrayList<Integer>(length*2);
149+
final char[] characters = string.toCharArray();
150+
for (int i=0; i<length; i++)
151+
result.add(new Integer(characters[i]));
152+
for (int i=0; i<length; i++)
153+
result.add(-1);
154+
return result;
155+
}
156+
157+
private String buildStringWithEndChar(CharSequence sequence) {
158+
STRING_BUILDER.setLength(0);
159+
STRING_BUILDER.append(sequence);
160+
if (STRING_BUILDER.indexOf(String.valueOf(END_SEQ_CHAR)) < 0)
161+
STRING_BUILDER.append(END_SEQ_CHAR);
162+
return STRING_BUILDER.toString();
163+
}
164+
165+
private class KMRsWithIndex{
166+
Integer beginKMR;
167+
Integer endKMR;
168+
Integer index;
169+
170+
KMRsWithIndex(Integer begin, Integer end, Integer index){
171+
this.beginKMR = begin;
172+
this.endKMR = end;
173+
this.index = index;
174+
}
175+
}
176+
}

0 commit comments

Comments
 (0)