@@ -62,7 +62,7 @@ const UNSET_CHECKING_FLAG_MASK: u64 = 0x7FFFFFFFFFFFFFFF;
62
62
/// ### Checking flag
63
63
///
64
64
/// It is possible that rows with same hash values exist in `input cols`.
65
- /// And if we `vectorized compare ` and `vectorized append` them
65
+ /// And if we `vectorized_equal_to ` and `vectorized append` them
66
66
/// in the same round, some fault cases will occur especially when
67
67
/// they are totally the repeated rows...
68
68
///
@@ -71,7 +71,7 @@ const UNSET_CHECKING_FLAG_MASK: u64 = 0x7FFFFFFFFFFFFFFF;
71
71
///
72
72
/// - We found their hash values equal to one exist group
73
73
///
74
- /// - We then perform `vectorized compare ` for them to the exist group,
74
+ /// - We then perform `vectorized_equal_to ` for them to the exist group,
75
75
/// and found their values not equal to the exist one
76
76
///
77
77
/// - Finally when perform `vectorized append`, we decide to build two
@@ -153,14 +153,14 @@ pub struct GroupValuesColumn {
153
153
/// is used to store the rows will be processed in next round.
154
154
remaining_indices : Vec < usize > ,
155
155
156
- /// The `vectorized compared ` row indices buffer
157
- vectorized_compare_row_indices : Vec < usize > ,
156
+ /// The `vectorized_equal_tod ` row indices buffer
157
+ vectorized_equal_to_row_indices : Vec < usize > ,
158
158
159
- /// The `vectorized compared ` group indices buffer
160
- vectorized_compare_group_indices : Vec < usize > ,
159
+ /// The `vectorized_equal_tod ` group indices buffer
160
+ vectorized_equal_to_group_indices : Vec < usize > ,
161
161
162
- /// The `vectorized compared ` result buffer
163
- vectorized_compare_results : Vec < bool > ,
162
+ /// The `vectorized_equal_tod ` result buffer
163
+ vectorized_equal_to_results : Vec < bool > ,
164
164
165
165
/// The `vectorized append` row indices buffer
166
166
vectorized_append_row_indices : Vec < usize > ,
@@ -204,9 +204,9 @@ impl GroupValuesColumn {
204
204
append_rows_buffer : Default :: default ( ) ,
205
205
current_indices : Default :: default ( ) ,
206
206
remaining_indices : Default :: default ( ) ,
207
- vectorized_compare_row_indices : Default :: default ( ) ,
208
- vectorized_compare_group_indices : Default :: default ( ) ,
209
- vectorized_compare_results : Default :: default ( ) ,
207
+ vectorized_equal_to_row_indices : Default :: default ( ) ,
208
+ vectorized_equal_to_group_indices : Default :: default ( ) ,
209
+ vectorized_equal_to_results : Default :: default ( ) ,
210
210
vectorized_append_row_indices : Default :: default ( ) ,
211
211
} )
212
212
}
@@ -260,8 +260,8 @@ impl GroupValuesColumn {
260
260
/// - Check if the `bucket` checking, if so add it to `remaining_indices`,
261
261
/// and just process it in next round, otherwise we continue the process
262
262
/// - Mark `bucket` checking, and add it to `checking_buckets`
263
- /// - Add row index to `vectorized_compare_row_indices `
264
- /// - Add group indices(from `group_index_lists`) to `vectorized_compare_group_indices `
263
+ /// - Add row index to `vectorized_equal_to_row_indices `
264
+ /// - Add group indices(from `group_index_lists`) to `vectorized_equal_to_group_indices `
265
265
///
266
266
fn collect_vectorized_process_context ( & mut self , batch_hashes : & [ u64 ] ) {
267
267
let mut next_group_idx = self . group_values [ 0 ] . len ( ) as u64 ;
@@ -308,21 +308,36 @@ impl GroupValuesColumn {
308
308
// Mark `bucket` checking, and add it to `checking_buckets`
309
309
bucket_ctx. set_checking ( ) ;
310
310
311
- // Add row index to `vectorized_compare_row_indices `
312
- // Add group indices(from `group_index_lists`) to `vectorized_compare_group_indices `
311
+ // Add row index to `vectorized_equal_to_row_indices `
312
+ // Add group indices(from `group_index_lists`) to `vectorized_equal_to_group_indices `
313
313
let mut next_group_index = bucket_ctx. group_index ( ) as usize + 1 ;
314
314
while next_group_index > 0 {
315
315
let current_group_index = next_group_index;
316
- self . vectorized_compare_row_indices . push ( row) ;
317
- self . vectorized_compare_group_indices
316
+ self . vectorized_equal_to_row_indices . push ( row) ;
317
+ self . vectorized_equal_to_group_indices
318
318
. push ( current_group_index - 1 ) ;
319
319
next_group_index = self . group_index_lists [ current_group_index] ;
320
320
}
321
321
}
322
- }
323
322
324
- fn vectorized_compare ( & mut self ) {
323
+ self . vectorized_equal_to_results
324
+ . resize ( self . vectorized_equal_to_group_indices . len ( ) , true ) ;
325
+ }
325
326
327
+ /// Perform `vectorized_equal_to`
328
+ ///
329
+ ///
330
+ fn vectorized_equal_to ( & mut self , cols : & [ ArrayRef ] ) {
331
+ let mut equal_to_results = mem:: take ( & mut self . vectorized_equal_to_results ) ;
332
+ for ( col_idx, group_col) in self . group_values . iter ( ) . enumerate ( ) {
333
+ group_col. vectorized_equal_to (
334
+ & self . vectorized_equal_to_group_indices ,
335
+ & cols[ col_idx] ,
336
+ & self . vectorized_equal_to_row_indices ,
337
+ & mut equal_to_results,
338
+ ) ;
339
+ }
340
+ self . vectorized_equal_to_results = equal_to_results;
326
341
}
327
342
}
328
343
@@ -411,25 +426,25 @@ impl GroupValues for GroupValuesColumn {
411
426
batch_hashes. resize ( n_rows, 0 ) ;
412
427
create_hashes ( cols, & self . random_state , & mut batch_hashes) ?;
413
428
414
- // General steps for one round `vectorized compare & append`:
429
+ // General steps for one round `vectorized equal_to & append`:
415
430
// 1. Collect vectorized context by checking hash values of `cols` in `map`
416
- // 2. Perform `vectorized compare `
417
- // 3. Perform `vectorized append `
431
+ // 2. Perform `vectorized_equal_to `
432
+ // 3. Perform `vectorized_append `
418
433
// 4. Reset the checking flag in `BucketContext`
419
434
420
435
let num_rows = cols[ 0 ] . len ( ) ;
421
436
self . current_indices . clear ( ) ;
422
437
self . current_indices . extend ( 0 ..num_rows) ;
423
438
while self . current_indices . len ( ) > 0 {
424
439
self . vectorized_append_row_indices . clear ( ) ;
425
- self . vectorized_compare_row_indices . clear ( ) ;
426
- self . vectorized_compare_group_indices . clear ( ) ;
427
- self . vectorized_compare_results . clear ( ) ;
440
+ self . vectorized_equal_to_row_indices . clear ( ) ;
441
+ self . vectorized_equal_to_group_indices . clear ( ) ;
442
+ self . vectorized_equal_to_results . clear ( ) ;
428
443
429
444
// 1. Collect vectorized context by checking hash values of `cols` in `map`
430
445
self . collect_vectorized_process_context ( & batch_hashes) ;
431
446
432
- // 2. Perform `vectorized compare `
447
+ // 2. Perform `vectorized_equal_to `
433
448
}
434
449
435
450
self . hashes_buffer = batch_hashes;
0 commit comments