@@ -250,68 +250,130 @@ impl HashJoinBuildState {
250
250
if self . hash_join_state . hash_join_desc . join_type == JoinType :: Cross {
251
251
return Ok ( ( ) ) ;
252
252
}
253
+ let skip_duplicates = matches ! (
254
+ self . hash_join_state. hash_join_desc. join_type,
255
+ JoinType :: InnerAny | JoinType :: LeftAny
256
+ ) ;
253
257
254
258
// Divide the finalize phase into multiple tasks.
255
259
self . generate_finalize_task ( ) ?;
256
260
257
261
// Create a fixed size hash table.
258
- let ( hash_join_hash_table, entry_size) = match self . method . clone ( ) {
259
- HashMethodKind :: Serializer ( _) => (
262
+ let ( hash_join_hash_table, entry_size) = match ( self . method . clone ( ) , skip_duplicates ) {
263
+ ( HashMethodKind :: Serializer ( _) , false ) => (
260
264
HashJoinHashTable :: Serializer ( SerializerHashJoinHashTable :: new (
261
265
BinaryHashJoinHashMap :: with_build_row_num ( build_num_rows) ,
262
266
HashMethodSerializer :: default ( ) ,
263
267
) ) ,
264
268
std:: mem:: size_of :: < StringRawEntry > ( ) ,
265
269
) ,
266
- HashMethodKind :: SingleBinary ( _) => (
270
+ ( HashMethodKind :: Serializer ( _) , true ) => (
271
+ HashJoinHashTable :: SkipDuplicatesSerializer ( SerializerHashJoinHashTable :: new (
272
+ BinaryHashJoinHashMap :: with_build_row_num ( build_num_rows) ,
273
+ HashMethodSerializer :: default ( ) ,
274
+ ) ) ,
275
+ std:: mem:: size_of :: < StringRawEntry > ( ) ,
276
+ ) ,
277
+ ( HashMethodKind :: SingleBinary ( _) , false ) => (
267
278
HashJoinHashTable :: SingleBinary ( SingleBinaryHashJoinHashTable :: new (
268
279
BinaryHashJoinHashMap :: with_build_row_num ( build_num_rows) ,
269
280
HashMethodSingleBinary :: default ( ) ,
270
281
) ) ,
271
282
std:: mem:: size_of :: < StringRawEntry > ( ) ,
272
283
) ,
273
- HashMethodKind :: KeysU8 ( hash_method) => (
284
+ ( HashMethodKind :: SingleBinary ( _) , true ) => (
285
+ HashJoinHashTable :: SkipDuplicatesSingleBinary (
286
+ SingleBinaryHashJoinHashTable :: new (
287
+ BinaryHashJoinHashMap :: with_build_row_num ( build_num_rows) ,
288
+ HashMethodSingleBinary :: default ( ) ,
289
+ ) ,
290
+ ) ,
291
+ std:: mem:: size_of :: < StringRawEntry > ( ) ,
292
+ ) ,
293
+ ( HashMethodKind :: KeysU8 ( hash_method) , false ) => (
274
294
HashJoinHashTable :: KeysU8 ( FixedKeyHashJoinHashTable :: new (
275
295
HashJoinHashMap :: < u8 > :: with_build_row_num ( build_num_rows) ,
276
296
hash_method,
277
297
) ) ,
278
298
std:: mem:: size_of :: < RawEntry < u8 > > ( ) ,
279
299
) ,
280
- HashMethodKind :: KeysU16 ( hash_method) => (
300
+ ( HashMethodKind :: KeysU8 ( hash_method) , true ) => (
301
+ HashJoinHashTable :: SkipDuplicatesKeysU8 ( FixedKeyHashJoinHashTable :: new (
302
+ HashJoinHashMap :: < u8 > :: with_build_row_num ( build_num_rows) ,
303
+ hash_method,
304
+ ) ) ,
305
+ std:: mem:: size_of :: < RawEntry < u8 > > ( ) ,
306
+ ) ,
307
+ ( HashMethodKind :: KeysU16 ( hash_method) , false ) => (
281
308
HashJoinHashTable :: KeysU16 ( FixedKeyHashJoinHashTable :: new (
282
309
HashJoinHashMap :: < u16 > :: with_build_row_num ( build_num_rows) ,
283
310
hash_method,
284
311
) ) ,
285
312
std:: mem:: size_of :: < RawEntry < u16 > > ( ) ,
286
313
) ,
287
- HashMethodKind :: KeysU32 ( hash_method) => (
314
+ ( HashMethodKind :: KeysU16 ( hash_method) , true ) => (
315
+ HashJoinHashTable :: SkipDuplicatesKeysU16 ( FixedKeyHashJoinHashTable :: new (
316
+ HashJoinHashMap :: < u16 > :: with_build_row_num ( build_num_rows) ,
317
+ hash_method,
318
+ ) ) ,
319
+ std:: mem:: size_of :: < RawEntry < u16 > > ( ) ,
320
+ ) ,
321
+ ( HashMethodKind :: KeysU32 ( hash_method) , false ) => (
288
322
HashJoinHashTable :: KeysU32 ( FixedKeyHashJoinHashTable :: new (
289
323
HashJoinHashMap :: < u32 > :: with_build_row_num ( build_num_rows) ,
290
324
hash_method,
291
325
) ) ,
292
326
std:: mem:: size_of :: < RawEntry < u32 > > ( ) ,
293
327
) ,
294
- HashMethodKind :: KeysU64 ( hash_method) => (
328
+ ( HashMethodKind :: KeysU32 ( hash_method) , true ) => (
329
+ HashJoinHashTable :: SkipDuplicatesKeysU32 ( FixedKeyHashJoinHashTable :: new (
330
+ HashJoinHashMap :: < u32 > :: with_build_row_num ( build_num_rows) ,
331
+ hash_method,
332
+ ) ) ,
333
+ std:: mem:: size_of :: < RawEntry < u32 > > ( ) ,
334
+ ) ,
335
+ ( HashMethodKind :: KeysU64 ( hash_method) , false ) => (
295
336
HashJoinHashTable :: KeysU64 ( FixedKeyHashJoinHashTable :: new (
296
337
HashJoinHashMap :: < u64 > :: with_build_row_num ( build_num_rows) ,
297
338
hash_method,
298
339
) ) ,
299
340
std:: mem:: size_of :: < RawEntry < u64 > > ( ) ,
300
341
) ,
301
- HashMethodKind :: KeysU128 ( hash_method) => (
342
+ ( HashMethodKind :: KeysU64 ( hash_method) , true ) => (
343
+ HashJoinHashTable :: SkipDuplicatesKeysU64 ( FixedKeyHashJoinHashTable :: new (
344
+ HashJoinHashMap :: < u64 > :: with_build_row_num ( build_num_rows) ,
345
+ hash_method,
346
+ ) ) ,
347
+ std:: mem:: size_of :: < RawEntry < u64 > > ( ) ,
348
+ ) ,
349
+ ( HashMethodKind :: KeysU128 ( hash_method) , false ) => (
302
350
HashJoinHashTable :: KeysU128 ( FixedKeyHashJoinHashTable :: new (
303
351
HashJoinHashMap :: < u128 > :: with_build_row_num ( build_num_rows) ,
304
352
hash_method,
305
353
) ) ,
306
354
std:: mem:: size_of :: < RawEntry < u128 > > ( ) ,
307
355
) ,
308
- HashMethodKind :: KeysU256 ( hash_method) => (
356
+ ( HashMethodKind :: KeysU128 ( hash_method) , true ) => (
357
+ HashJoinHashTable :: SkipDuplicatesKeysU128 ( FixedKeyHashJoinHashTable :: new (
358
+ HashJoinHashMap :: < u128 > :: with_build_row_num ( build_num_rows) ,
359
+ hash_method,
360
+ ) ) ,
361
+ std:: mem:: size_of :: < RawEntry < u128 > > ( ) ,
362
+ ) ,
363
+ ( HashMethodKind :: KeysU256 ( hash_method) , false ) => (
309
364
HashJoinHashTable :: KeysU256 ( FixedKeyHashJoinHashTable :: new (
310
365
HashJoinHashMap :: < U256 > :: with_build_row_num ( build_num_rows) ,
311
366
hash_method,
312
367
) ) ,
313
368
std:: mem:: size_of :: < RawEntry < U256 > > ( ) ,
314
369
) ,
370
+ ( HashMethodKind :: KeysU256 ( hash_method) , true ) => (
371
+ HashJoinHashTable :: SkipDuplicatesKeysU256 ( FixedKeyHashJoinHashTable :: new (
372
+ HashJoinHashMap :: < U256 > :: with_build_row_num ( build_num_rows) ,
373
+ hash_method,
374
+ ) ) ,
375
+ std:: mem:: size_of :: < RawEntry < U256 > > ( ) ,
376
+ ) ,
315
377
} ;
316
378
self . entry_size . store ( entry_size, Ordering :: Release ) ;
317
379
let hash_table = unsafe { & mut * self . hash_join_state . hash_table . get ( ) } ;
@@ -378,7 +440,7 @@ impl HashJoinBuildState {
378
440
next: 0 ,
379
441
}
380
442
}
381
- $table. insert( * key, raw_entry_ptr, $skip_duplicates ) ;
443
+ $table. insert:: <$skip_duplicates> ( * key, raw_entry_ptr) ;
382
444
raw_entry_ptr = unsafe { raw_entry_ptr. add( 1 ) } ;
383
445
}
384
446
}
@@ -398,7 +460,7 @@ impl HashJoinBuildState {
398
460
next: 0 ,
399
461
}
400
462
}
401
- $table. insert( * key, raw_entry_ptr, $skip_duplicates ) ;
463
+ $table. insert:: <$skip_duplicates> ( * key, raw_entry_ptr) ;
402
464
raw_entry_ptr = unsafe { raw_entry_ptr. add( 1 ) } ;
403
465
}
404
466
}
@@ -469,7 +531,7 @@ impl HashJoinBuildState {
469
531
string_local_space_ptr = string_local_space_ptr. add( key. len( ) ) ;
470
532
}
471
533
472
- $table. insert( key, raw_entry_ptr, $skip_duplicates ) ;
534
+ $table. insert:: <$skip_duplicates> ( key, raw_entry_ptr) ;
473
535
raw_entry_ptr = unsafe { raw_entry_ptr. add( 1 ) } ;
474
536
}
475
537
}
@@ -502,7 +564,7 @@ impl HashJoinBuildState {
502
564
string_local_space_ptr = string_local_space_ptr. add( key. len( ) ) ;
503
565
}
504
566
505
- $table. insert( key, raw_entry_ptr, $skip_duplicates ) ;
567
+ $table. insert:: <$skip_duplicates> ( key, raw_entry_ptr) ;
506
568
raw_entry_ptr = unsafe { raw_entry_ptr. add( 1 ) } ;
507
569
}
508
570
}
@@ -627,10 +689,6 @@ impl HashJoinBuildState {
627
689
}
628
690
_ => { }
629
691
} ;
630
- let skip_duplicates = matches ! (
631
- self . hash_join_state. hash_join_desc. join_type,
632
- JoinType :: InnerAny | JoinType :: LeftAny
633
- ) ;
634
692
635
693
keys_entries
636
694
. iter_mut ( )
@@ -641,34 +699,58 @@ impl HashJoinBuildState {
641
699
642
700
match hashtable {
643
701
HashJoinHashTable :: Serializer ( table) => insert_binary_key ! {
644
- & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, skip_duplicates ,
702
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, false ,
645
703
} ,
646
704
HashJoinHashTable :: SingleBinary ( table) => insert_binary_key ! {
647
- & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, skip_duplicates ,
705
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, false ,
648
706
} ,
649
707
HashJoinHashTable :: KeysU8 ( table) => insert_key ! {
650
- & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u8 , skip_duplicates ,
708
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u8 , false ,
651
709
} ,
652
710
HashJoinHashTable :: KeysU16 ( table) => insert_key ! {
653
- & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u16 , skip_duplicates ,
711
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u16 , false ,
654
712
} ,
655
713
HashJoinHashTable :: KeysU32 ( table) => insert_key ! {
656
- & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u32 , skip_duplicates ,
714
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u32 , false ,
657
715
} ,
658
716
HashJoinHashTable :: KeysU64 ( table) => insert_key ! {
659
- & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u64 , skip_duplicates ,
717
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u64 , false ,
660
718
} ,
661
719
HashJoinHashTable :: KeysU128 ( table) => insert_key ! {
662
- & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u128 , skip_duplicates ,
720
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u128 , false ,
663
721
} ,
664
722
HashJoinHashTable :: KeysU256 ( table) => insert_key ! {
665
- & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, U256 , skip_duplicates ,
723
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, U256 , false ,
666
724
} ,
667
725
HashJoinHashTable :: Null => {
668
726
return Err ( ErrorCode :: AbortedQuery (
669
727
"Aborted query, because the hash table is uninitialized." ,
670
728
) ) ;
671
729
}
730
+ HashJoinHashTable :: SkipDuplicatesSerializer ( table) => insert_binary_key ! {
731
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, true ,
732
+ } ,
733
+ HashJoinHashTable :: SkipDuplicatesSingleBinary ( table) => insert_binary_key ! {
734
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, true ,
735
+ } ,
736
+ HashJoinHashTable :: SkipDuplicatesKeysU8 ( table) => insert_key ! {
737
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u8 , true ,
738
+ } ,
739
+ HashJoinHashTable :: SkipDuplicatesKeysU16 ( table) => insert_key ! {
740
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u16 , true ,
741
+ } ,
742
+ HashJoinHashTable :: SkipDuplicatesKeysU32 ( table) => insert_key ! {
743
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u32 , true ,
744
+ } ,
745
+ HashJoinHashTable :: SkipDuplicatesKeysU64 ( table) => insert_key ! {
746
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u64 , true ,
747
+ } ,
748
+ HashJoinHashTable :: SkipDuplicatesKeysU128 ( table) => insert_key ! {
749
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, u128 , true ,
750
+ } ,
751
+ HashJoinHashTable :: SkipDuplicatesKeysU256 ( table) => insert_key ! {
752
+ & mut table. hash_table, & table. hash_method, chunk, build_keys, valids, chunk_index as u32 , entry_size, & mut local_raw_entry_spaces, U256 , true ,
753
+ } ,
672
754
}
673
755
674
756
{
0 commit comments