15
15
use std:: alloc:: Allocator ;
16
16
use std:: marker:: PhantomData ;
17
17
use std:: sync:: atomic:: AtomicU64 ;
18
+ use std:: sync:: atomic:: AtomicUsize ;
18
19
use std:: sync:: atomic:: Ordering ;
19
20
20
21
use databend_common_base:: hints:: assume;
@@ -102,18 +103,31 @@ pub fn hash_bits() -> u32 {
102
103
}
103
104
}
104
105
105
- pub struct HashJoinHashTable < K : Keyable , A : Allocator + Clone = DefaultAllocator > {
106
+ pub struct HashJoinHashTable <
107
+ K : Keyable ,
108
+ const SKIP_DUPLICATES : bool = false,
109
+ A : Allocator + Clone = DefaultAllocator ,
110
+ > {
106
111
pub ( crate ) pointers : Box < [ u64 ] , A > ,
107
112
pub ( crate ) atomic_pointers : * mut AtomicU64 ,
108
113
pub ( crate ) hash_shift : usize ,
109
114
pub ( crate ) phantom : PhantomData < K > ,
115
+ pub ( crate ) count : AtomicUsize ,
110
116
}
111
117
112
- unsafe impl < K : Keyable + Send , A : Allocator + Clone + Send > Send for HashJoinHashTable < K , A > { }
118
+ unsafe impl < K : Keyable + Send , A : Allocator + Clone + Send , const SKIP_DUPLICATES : bool > Send
119
+ for HashJoinHashTable < K , SKIP_DUPLICATES , A >
120
+ {
121
+ }
113
122
114
- unsafe impl < K : Keyable + Sync , A : Allocator + Clone + Sync > Sync for HashJoinHashTable < K , A > { }
123
+ unsafe impl < K : Keyable + Sync , A : Allocator + Clone + Sync , const SKIP_DUPLICATES : bool > Sync
124
+ for HashJoinHashTable < K , SKIP_DUPLICATES , A >
125
+ {
126
+ }
115
127
116
- impl < K : Keyable , A : Allocator + Clone + Default > HashJoinHashTable < K , A > {
128
+ impl < K : Keyable , A : Allocator + Clone + Default + ' static , const SKIP_DUPLICATES : bool >
129
+ HashJoinHashTable < K , SKIP_DUPLICATES , A >
130
+ {
117
131
pub fn with_build_row_num ( row_num : usize ) -> Self {
118
132
let capacity = std:: cmp:: max ( ( row_num * 2 ) . next_power_of_two ( ) , 1 << 10 ) ;
119
133
let mut hashtable = Self {
@@ -123,6 +137,7 @@ impl<K: Keyable, A: Allocator + Clone + Default> HashJoinHashTable<K, A> {
123
137
atomic_pointers : std:: ptr:: null_mut ( ) ,
124
138
hash_shift : ( hash_bits ( ) - capacity. trailing_zeros ( ) ) as usize ,
125
139
phantom : PhantomData ,
140
+ count : Default :: default ( ) ,
126
141
} ;
127
142
hashtable. atomic_pointers = unsafe {
128
143
std:: mem:: transmute :: < * mut u64 , * mut AtomicU64 > ( hashtable. pointers . as_mut_ptr ( ) )
@@ -138,6 +153,12 @@ impl<K: Keyable, A: Allocator + Clone + Default> HashJoinHashTable<K, A> {
138
153
// `index` is less than the capacity of hash table.
139
154
let mut old_header = unsafe { ( * self . atomic_pointers . add ( index) ) . load ( Ordering :: Relaxed ) } ;
140
155
loop {
156
+ if SKIP_DUPLICATES
157
+ && early_filtering ( old_header, hash)
158
+ && self . next_contains ( & key, remove_header_tag ( old_header) )
159
+ {
160
+ return ;
161
+ }
141
162
let res = unsafe {
142
163
( * self . atomic_pointers . add ( index) ) . compare_exchange_weak (
143
164
old_header,
@@ -151,11 +172,13 @@ impl<K: Keyable, A: Allocator + Clone + Default> HashJoinHashTable<K, A> {
151
172
Err ( x) => old_header = x,
152
173
} ;
153
174
}
175
+ self . count . fetch_add ( 1 , Ordering :: Relaxed ) ;
154
176
unsafe { ( * entry_ptr) . next = remove_header_tag ( old_header) } ;
155
177
}
156
178
}
157
179
158
- impl < K , A > HashJoinHashtableLike for HashJoinHashTable < K , A >
180
+ impl < K , A , const SKIP_DUPLICATES : bool > HashJoinHashtableLike
181
+ for HashJoinHashTable < K , SKIP_DUPLICATES , A >
159
182
where
160
183
K : Keyable ,
161
184
A : Allocator + Clone + ' static ,
@@ -373,4 +396,8 @@ where
373
396
}
374
397
0
375
398
}
399
+
400
+ fn len ( & self ) -> usize {
401
+ self . count . load ( Ordering :: Relaxed )
402
+ }
376
403
}
0 commit comments