Skip to content

Commit d855898

Browse files
Mess with wavelet-matrix
1 parent 1f51623 commit d855898

File tree

6 files changed

+243
-2
lines changed

6 files changed

+243
-2
lines changed

Cargo.lock

+162
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/divsuftest/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/wavelet-matrix-rs

crates/divsuftest/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ cdivsufsort = { path = "../cdivsufsort" }
1313
suffix_array = "0.4.0"
1414
better-panic = "0.2.0"
1515
size_format = "1.0.2"
16+
wavelet-matrix = { path = "./wavelet-matrix-rs" }

crates/divsuftest/crosscheck/c

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
:: sssort(A)
2+
7 1 2 3 4 6
3+
:: sssort(B)
4+
4 6 -2 -3 -4 7
5+
pascal limit=2 first=3 last=7
6+
insertionsort last-first=4
7+
pascal limit=-3 first=3 last=7
8+
0<=*first
9+
first<last
10+
++a<last
11+
budget pass
12+
pascal limit=1 first=4 last=6
13+
insertionsort last-first=2
14+
pascal limit=-3 first=4 last=6
15+
0<=*first
16+
times pop

crates/divsuftest/crosscheck/rust

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
:: sssort(A)
2+
7 1 2 3 4 6
3+
:: sssort(B)
4+
4 6 -2 -4 0 7
5+
pascal limit=1 first=3 last=6
6+
insertionsort last-first=3
7+
pascal limit=-3 first=3 last=6
8+
first<last
9+
++a<last
10+
budget pass
11+
pascal limit=1 first=3 last=5
12+
insertionsort last-first=2
13+
pascal limit=-3 first=3 last=5
14+
0<=*first
15+
times pop

crates/divsuftest/src/main.rs

+48-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@ use std::time::Instant;
33

44
fn main() {
55
better_panic::install();
6-
std::fs::create_dir_all("crosscheck").unwrap();
6+
7+
#[cfg(feature = "crosscheck")]
8+
{
9+
println!("Cross-checking enabled");
10+
std::fs::create_dir_all("crosscheck").unwrap();
11+
}
712

813
let first_arg = std::env::args().nth(1).unwrap_or_else(|| {
914
std::path::PathBuf::from("testdata")
@@ -23,6 +28,46 @@ fn main() {
2328
SizeFormatterBinary::new(maxlen as u64)
2429
);
2530

31+
println!("{:>20} Running", "wavelet");
32+
let wavelet_duration = {
33+
let (before, eighties, after) = unsafe { input.align_to::<u64>() };
34+
println!(
35+
"original {:10} split {:10} {:10} {:10}",
36+
input.len(),
37+
before.len(),
38+
eighties.len(),
39+
after.len()
40+
);
41+
42+
let before_wavelet = Instant::now();
43+
let wm = wavelet_matrix::WaveletMatrix::new(eighties);
44+
let res = before_wavelet.elapsed();
45+
46+
{
47+
let needle = "call (netbsd-amd64-cgo), const ENOSYS = 78
48+
pkg syscall (netbsd-amd64-cgo), const ENOTBLK = 15
49+
pkg syscall (netbs";
50+
let needle = needle;
51+
let needle_bytes = needle.as_bytes();
52+
let (_, needle_eighties, _) = unsafe { needle_bytes.align_to::<u64>() };
53+
54+
let mut range = 0..eighties.len();
55+
let mut lastoffset = 0;
56+
for &c in needle_eighties {
57+
let offset = wm.search_prefix(range.clone(), c, 0).next().unwrap();
58+
range = offset..eighties.len();
59+
println!(
60+
"offset = {:x} ({:x}) text = {:?}",
61+
offset * 8,
62+
offset - lastoffset,
63+
std::str::from_utf8(&input[offset * 8..(offset + 1) * 8])
64+
);
65+
lastoffset = offset;
66+
}
67+
}
68+
res
69+
};
70+
2671
println!("{:>20} Running", "c");
2772

2873
let before_c = Instant::now();
@@ -66,10 +111,11 @@ fn main() {
66111
before_huc.elapsed()
67112
};
68113

114+
let s0 = format!("wavelet {:?}", wavelet_duration);
69115
let s1 = format!("c {:?}", c_duration);
70116
let s2 = format!("rust {:?}", rust_duration);
71117
let s3 = format!("rust-ref {:?}", huc_duration);
72-
println!("{:30} {:30} {:30}", s1, s2, s3);
118+
println!("{:20} {:20} {:20} {:20}", s0, s1, s2, s3);
73119
}
74120

75121
fn check_order<SA: Fn(usize) -> i32>(sa: SA, input: &[u8]) {

0 commit comments

Comments
 (0)