-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSuffixArray.scala
114 lines (105 loc) · 4.18 KB
/
SuffixArray.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package competitivescala.strings
object SuffixArray {
// Complexity: O(n log n)
// Requires a injection between T and Ints
def sortSuffixes[T](seq: IndexedSeq[T])(implicit toInt: T => Int): IndexedSeq[Int] = {
val n = seq.size
val indices = seq.indices
def iteratePow(k: Int, array: IndexedSeq[Int], ranks: IndexedSeq[Int]): IndexedSeq[Int] = {
if(k < n) {
val hk = k / 2
val newRanks = indices.foldLeft(ranks)((currentRanks, i) =>
currentRanks.updated(array(i),
if(i > 0 &&
ranks(array(i - 1)) == ranks(array(i)) &&
array(i - 1) + k < n &&
ranks(array(i - 1) + hk) == ranks(array(i) + hk))
currentRanks(array(i - 1))
else
i
)
)
val (newArray, _) = indices.foldLeft((array, indices.toVector)) { case ((currentArray, currentCount), i) =>
val s = array(i) - k
if(s >= 0) {
val r = newRanks(s)
(currentArray.updated(currentCount(r), s), currentCount.updated(r, currentCount(r) + 1))
} else {
(currentArray, currentCount)
}
}
iteratePow(k * 2, newArray, newRanks)
} else {
array
}
}
iteratePow(1, seq.indices.reverse.sortBy(seq.andThen(toInt)), seq.map(toInt))
}
// Complexity: O(n log n)
def burrowsWheelerTransform[T](seq: IndexedSeq[T])(implicit toInt: T => Int): (Int, IndexedSeq[T]) = {
val sorted = sortSuffixes(seq)
val n = seq.size
(sorted.indexOf(0), sorted.map(i => seq((i + n - 1) % n)))
}
// Complexity: O(n log n)
def burrowsWheelerInverseTransform[T](index: Int, seq: IndexedSeq[T])(implicit toInt: T => Int): IndexedSeq[T] = {
if(seq.nonEmpty) {
def withRank(seq: Seq[T]): Seq[(T, Int)] =
seq.foldLeft((Seq.empty[(T, Int)], Map.empty[T, Int].withDefault(_ => 0))) { case ((acc, map), e) =>
((e, map(e)) +: acc, map + (e -> (map(e) + 1)))
}._1.reverse
val last = withRank(seq)
val mapFirstLast = withRank(seq.sortBy(toInt)).zip(last).toMap
seq.foldLeft((last(index), Seq.empty[T])) { case ((symbol, acc), _) => (mapFirstLast(symbol), symbol._1 +: acc) }._2.toIndexedSeq
} else {
IndexedSeq.empty
}
}
// Complexity: O(n log n)
def sortRotations[T](seq: IndexedSeq[T])(implicit toInt: T => Int): IndexedSeq[Int] = {
val n = seq.size
val indices = seq.indices
def iteratePow(k: Int, array: IndexedSeq[Int], ranks: IndexedSeq[Int]): IndexedSeq[Int] = {
if(k < n) {
val hk = k / 2
val newRanks = indices.foldLeft(ranks)((currentRanks, i) =>
currentRanks.updated(array(i),
if(i > 0 &&
ranks(array(i - 1)) == ranks(array(i)) &&
ranks((array(i - 1) + hk) % n) == ranks((array(i) + hk) % n))
currentRanks(array(i - 1))
else
i
)
)
val (newArray, _) = indices.foldLeft((array, indices.toVector)) { case ((currentArray, currentCount), i) =>
val s = (array(i) - k + n) % n
val r = newRanks(s)
(currentArray.updated(currentCount(r), s), currentCount.updated(r, currentCount(r) + 1))
}
iteratePow(k * 2, newArray, newRanks)
} else {
array
}
}
iteratePow(1, seq.indices.sortBy(seq.andThen(toInt)), seq.map(toInt))
}
def longestCommonPrefixArray[T](seq: IndexedSeq[T])(implicit toInt: T => Int): IndexedSeq[Int] =
longestCommonPrefixArray(seq, sortSuffixes(seq))
// Complexity: O(n log n) for efficiency in this implementation; O(n) theoretically
def longestCommonPrefixArray[T](seq: IndexedSeq[T], suffixArray: IndexedSeq[Int]): IndexedSeq[Int] = {
val n = suffixArray.size
val rank = suffixArray.zipWithIndex.sortBy(_._1).map(_._2)
seq.indices.foldLeft((IndexedSeq.fill(n)(0), 0)) { case ((lcp, k), i) =>
if(rank(i) == n - 1) {
(lcp, k)
} else {
val r = rank(i)
val j = suffixArray(r + 1)
def forward(k: Int): Int = if(i + k < n && j + k < n && seq(i + k) == seq(j + k)) forward(k + 1) else k
val k1 = forward(k)
(lcp.updated(r, k1), if(k1 > 0) k1 - 1 else k1)
}
}._1
}
}