Skip to content

Commit 79f3354

Browse files
committed
test: add unit test for BloomFilter
Signed-off-by: Kould <[email protected]>
1 parent 9d37617 commit 79f3354

File tree

1 file changed

+70
-3
lines changed

1 file changed

+70
-3
lines changed

src/query/storages/common/index/src/filters/xor8/bloom_filter.rs

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,18 @@ impl BloomFilter {
205205
assert!(filter_size > 0, "filter_size must be > 0");
206206
item_count = max(item_count, 1);
207207

208-
let ln2 = std::f64::consts::LN_2;
209-
let k = ((filter_size as f64 / item_count as f64) * ln2).ceil() as usize;
210-
let k = k.max(1);
208+
let k = Self::optimal_k(filter_size, item_count);
211209

212210
Self::with_params(filter_size, k, seed)
213211
}
214212

213+
#[inline]
214+
fn optimal_k(filter_size: usize, item_count: usize) -> usize {
215+
let ln2 = std::f64::consts::LN_2;
216+
let k = ((filter_size as f64 / item_count as f64) * ln2).ceil() as usize;
217+
k.max(1)
218+
}
219+
215220
pub fn with_params(size: usize, hashes: usize, seed: u64) -> Self {
216221
assert_ne!(size, 0);
217222
assert_ne!(hashes, 0);
@@ -286,3 +291,65 @@ impl From<BloomBuildingError> for ErrorCode {
286291
ErrorCode::Internal(e.to_string())
287292
}
288293
}
294+
295+
#[cfg(test)]
296+
mod tests {
297+
use super::*;
298+
299+
#[test]
300+
fn test_block_insert_and_check() {
301+
for i in 0..1_000_000 {
302+
let mut filter = BloomFilter::with_params(10, 1, 0);
303+
filter.add(i);
304+
assert!(filter.find(i));
305+
}
306+
}
307+
308+
#[test]
309+
fn test_sbbf_insert_and_check() {
310+
let item_count = 1_000_000;
311+
let mut filter = BloomFilter::with_item_count(10 * 1024, item_count, 0);
312+
for i in 0..item_count as u64 {
313+
let key = format!("key_{}", i);
314+
filter.add(i);
315+
assert!(filter.find(i));
316+
}
317+
}
318+
319+
#[test]
320+
fn test_encode_and_decode() {
321+
let mut hashes = Vec::new();
322+
for i in 0..500000 {
323+
let key = format!("key_{}", i);
324+
hashes.push(i);
325+
}
326+
let mut filter = BloomFilter::with_params(10 * 1024, 1, 0);
327+
for hash in hashes.iter() {
328+
filter.add(*hash);
329+
}
330+
assert!(hashes.iter().all(|hash| filter.find(*hash)));
331+
let mut buf = filter.to_bytes().unwrap();
332+
let (decode_filter, _) = BloomFilter::from_bytes(&buf).unwrap();
333+
filter
334+
.filter
335+
.iter()
336+
.zip(decode_filter.filter.iter())
337+
.for_each(|(a, b)| {
338+
assert_eq!(a, b);
339+
});
340+
assert!(hashes.iter().all(|hash| decode_filter.find(*hash)));
341+
}
342+
343+
#[test]
344+
fn test_optimal_k() {
345+
assert_eq!(BloomFilter::optimal_k(1000, 100), 7); // (1000/100)*ln(2) ≈ 6.93 → ceil → 7
346+
assert_eq!(BloomFilter::optimal_k(1024, 128), 6); // (1024/128)*ln(2) ≈ 5.545 → ceil → 6
347+
assert_eq!(BloomFilter::optimal_k(100, 1000), 1); // (100/1000)*ln(2) ≈ 0.069 → ceil → 1
348+
assert_eq!(BloomFilter::optimal_k(100, 100), 1); // (100/100)*ln(2) ≈ 0.693 → ceil → 1
349+
assert_eq!(BloomFilter::optimal_k(1, 1), 1); // (1/1)*ln(2) ≈ 0.693 → ceil → 1
350+
assert_eq!(BloomFilter::optimal_k(1, 1000), 1); // (1/1000)*ln(2) ≈ 0.0007 → ceil → 1
351+
assert_eq!(BloomFilter::optimal_k(100, 50), 2); // (100/50)*ln(2) ≈ 1.386 → ceil → 2
352+
assert_eq!(BloomFilter::optimal_k(101, 50), 2); // (101/50)*ln(2) ≈ 1.400 → ceil → 2
353+
assert_eq!(BloomFilter::optimal_k(1_000_000, 10_000), 70); // (1e6/1e4)*ln(2) ≈ 69.31 → ceil → 70
354+
}
355+
}

0 commit comments

Comments
 (0)