@@ -205,13 +205,18 @@ impl BloomFilter {
205
205
assert ! ( filter_size > 0 , "filter_size must be > 0" ) ;
206
206
item_count = max ( item_count, 1 ) ;
207
207
208
- let ln2 = std:: f64:: consts:: LN_2 ;
209
- let k = ( ( filter_size as f64 / item_count as f64 ) * ln2) . ceil ( ) as usize ;
210
- let k = k. max ( 1 ) ;
208
+ let k = Self :: optimal_k ( filter_size, item_count) ;
211
209
212
210
Self :: with_params ( filter_size, k, seed)
213
211
}
214
212
213
+ #[ inline]
214
+ fn optimal_k ( filter_size : usize , item_count : usize ) -> usize {
215
+ let ln2 = std:: f64:: consts:: LN_2 ;
216
+ let k = ( ( filter_size as f64 / item_count as f64 ) * ln2) . ceil ( ) as usize ;
217
+ k. max ( 1 )
218
+ }
219
+
215
220
pub fn with_params ( size : usize , hashes : usize , seed : u64 ) -> Self {
216
221
assert_ne ! ( size, 0 ) ;
217
222
assert_ne ! ( hashes, 0 ) ;
@@ -286,3 +291,65 @@ impl From<BloomBuildingError> for ErrorCode {
286
291
ErrorCode :: Internal ( e. to_string ( ) )
287
292
}
288
293
}
294
+
295
+ #[ cfg( test) ]
296
+ mod tests {
297
+ use super :: * ;
298
+
299
+ #[ test]
300
+ fn test_block_insert_and_check ( ) {
301
+ for i in 0 ..1_000_000 {
302
+ let mut filter = BloomFilter :: with_params ( 10 , 1 , 0 ) ;
303
+ filter. add ( i) ;
304
+ assert ! ( filter. find( i) ) ;
305
+ }
306
+ }
307
+
308
+ #[ test]
309
+ fn test_sbbf_insert_and_check ( ) {
310
+ let item_count = 1_000_000 ;
311
+ let mut filter = BloomFilter :: with_item_count ( 10 * 1024 , item_count, 0 ) ;
312
+ for i in 0 ..item_count as u64 {
313
+ let key = format ! ( "key_{}" , i) ;
314
+ filter. add ( i) ;
315
+ assert ! ( filter. find( i) ) ;
316
+ }
317
+ }
318
+
319
+ #[ test]
320
+ fn test_encode_and_decode ( ) {
321
+ let mut hashes = Vec :: new ( ) ;
322
+ for i in 0 ..500000 {
323
+ let key = format ! ( "key_{}" , i) ;
324
+ hashes. push ( i) ;
325
+ }
326
+ let mut filter = BloomFilter :: with_params ( 10 * 1024 , 1 , 0 ) ;
327
+ for hash in hashes. iter ( ) {
328
+ filter. add ( * hash) ;
329
+ }
330
+ assert ! ( hashes. iter( ) . all( |hash| filter. find( * hash) ) ) ;
331
+ let mut buf = filter. to_bytes ( ) . unwrap ( ) ;
332
+ let ( decode_filter, _) = BloomFilter :: from_bytes ( & buf) . unwrap ( ) ;
333
+ filter
334
+ . filter
335
+ . iter ( )
336
+ . zip ( decode_filter. filter . iter ( ) )
337
+ . for_each ( |( a, b) | {
338
+ assert_eq ! ( a, b) ;
339
+ } ) ;
340
+ assert ! ( hashes. iter( ) . all( |hash| decode_filter. find( * hash) ) ) ;
341
+ }
342
+
343
+ #[ test]
344
+ fn test_optimal_k ( ) {
345
+ assert_eq ! ( BloomFilter :: optimal_k( 1000 , 100 ) , 7 ) ; // (1000/100)*ln(2) ≈ 6.93 → ceil → 7
346
+ assert_eq ! ( BloomFilter :: optimal_k( 1024 , 128 ) , 6 ) ; // (1024/128)*ln(2) ≈ 5.545 → ceil → 6
347
+ assert_eq ! ( BloomFilter :: optimal_k( 100 , 1000 ) , 1 ) ; // (100/1000)*ln(2) ≈ 0.069 → ceil → 1
348
+ assert_eq ! ( BloomFilter :: optimal_k( 100 , 100 ) , 1 ) ; // (100/100)*ln(2) ≈ 0.693 → ceil → 1
349
+ assert_eq ! ( BloomFilter :: optimal_k( 1 , 1 ) , 1 ) ; // (1/1)*ln(2) ≈ 0.693 → ceil → 1
350
+ assert_eq ! ( BloomFilter :: optimal_k( 1 , 1000 ) , 1 ) ; // (1/1000)*ln(2) ≈ 0.0007 → ceil → 1
351
+ assert_eq ! ( BloomFilter :: optimal_k( 100 , 50 ) , 2 ) ; // (100/50)*ln(2) ≈ 1.386 → ceil → 2
352
+ assert_eq ! ( BloomFilter :: optimal_k( 101 , 50 ) , 2 ) ; // (101/50)*ln(2) ≈ 1.400 → ceil → 2
353
+ assert_eq ! ( BloomFilter :: optimal_k( 1_000_000 , 10_000 ) , 70 ) ; // (1e6/1e4)*ln(2) ≈ 69.31 → ceil → 70
354
+ }
355
+ }
0 commit comments