Skip to content

Commit 1a7c2eb

Browse files
committed
impl vectorized_equal_to.
1 parent dad79c0 commit 1a7c2eb

File tree

2 files changed

+56
-41
lines changed

2 files changed

+56
-41
lines changed

datafusion/physical-plan/src/aggregates/group_values/column.rs

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ const UNSET_CHECKING_FLAG_MASK: u64 = 0x7FFFFFFFFFFFFFFF;
6262
/// ### Checking flag
6363
///
6464
/// It is possible that rows with same hash values exist in `input cols`.
65-
/// And if we `vectorized compare` and `vectorized append` them
65+
/// And if we `vectorized_equal_to` and `vectorized append` them
6666
/// in the same round, some fault cases will occur especially when
6767
/// they are totally the repeated rows...
6868
///
@@ -71,7 +71,7 @@ const UNSET_CHECKING_FLAG_MASK: u64 = 0x7FFFFFFFFFFFFFFF;
7171
///
7272
/// - We found their hash values equal to one exist group
7373
///
74-
/// - We then perform `vectorized compare` for them to the exist group,
74+
/// - We then perform `vectorized_equal_to` for them to the exist group,
7575
/// and found their values not equal to the exist one
7676
///
7777
/// - Finally when perform `vectorized append`, we decide to build two
@@ -153,14 +153,14 @@ pub struct GroupValuesColumn {
153153
/// is used to store the rows will be processed in next round.
154154
remaining_indices: Vec<usize>,
155155

156-
/// The `vectorized compared` row indices buffer
157-
vectorized_compare_row_indices: Vec<usize>,
156+
/// The `vectorized_equal_tod` row indices buffer
157+
vectorized_equal_to_row_indices: Vec<usize>,
158158

159-
/// The `vectorized compared` group indices buffer
160-
vectorized_compare_group_indices: Vec<usize>,
159+
/// The `vectorized_equal_tod` group indices buffer
160+
vectorized_equal_to_group_indices: Vec<usize>,
161161

162-
/// The `vectorized compared` result buffer
163-
vectorized_compare_results: Vec<bool>,
162+
/// The `vectorized_equal_tod` result buffer
163+
vectorized_equal_to_results: Vec<bool>,
164164

165165
/// The `vectorized append` row indices buffer
166166
vectorized_append_row_indices: Vec<usize>,
@@ -204,9 +204,9 @@ impl GroupValuesColumn {
204204
append_rows_buffer: Default::default(),
205205
current_indices: Default::default(),
206206
remaining_indices: Default::default(),
207-
vectorized_compare_row_indices: Default::default(),
208-
vectorized_compare_group_indices: Default::default(),
209-
vectorized_compare_results: Default::default(),
207+
vectorized_equal_to_row_indices: Default::default(),
208+
vectorized_equal_to_group_indices: Default::default(),
209+
vectorized_equal_to_results: Default::default(),
210210
vectorized_append_row_indices: Default::default(),
211211
})
212212
}
@@ -260,8 +260,8 @@ impl GroupValuesColumn {
260260
/// - Check if the `bucket` checking, if so add it to `remaining_indices`,
261261
/// and just process it in next round, otherwise we continue the process
262262
/// - Mark `bucket` checking, and add it to `checking_buckets`
263-
/// - Add row index to `vectorized_compare_row_indices`
264-
/// - Add group indices(from `group_index_lists`) to `vectorized_compare_group_indices`
263+
/// - Add row index to `vectorized_equal_to_row_indices`
264+
/// - Add group indices(from `group_index_lists`) to `vectorized_equal_to_group_indices`
265265
///
266266
fn collect_vectorized_process_context(&mut self, batch_hashes: &[u64]) {
267267
let mut next_group_idx = self.group_values[0].len() as u64;
@@ -308,21 +308,36 @@ impl GroupValuesColumn {
308308
// Mark `bucket` checking, and add it to `checking_buckets`
309309
bucket_ctx.set_checking();
310310

311-
// Add row index to `vectorized_compare_row_indices`
312-
// Add group indices(from `group_index_lists`) to `vectorized_compare_group_indices`
311+
// Add row index to `vectorized_equal_to_row_indices`
312+
// Add group indices(from `group_index_lists`) to `vectorized_equal_to_group_indices`
313313
let mut next_group_index = bucket_ctx.group_index() as usize + 1;
314314
while next_group_index > 0 {
315315
let current_group_index = next_group_index;
316-
self.vectorized_compare_row_indices.push(row);
317-
self.vectorized_compare_group_indices
316+
self.vectorized_equal_to_row_indices.push(row);
317+
self.vectorized_equal_to_group_indices
318318
.push(current_group_index - 1);
319319
next_group_index = self.group_index_lists[current_group_index];
320320
}
321321
}
322-
}
323322

324-
fn vectorized_compare(&mut self) {
323+
self.vectorized_equal_to_results
324+
.resize(self.vectorized_equal_to_group_indices.len(), true);
325+
}
325326

327+
/// Perform `vectorized_equal_to`
328+
///
329+
///
330+
fn vectorized_equal_to(&mut self, cols: &[ArrayRef]) {
331+
let mut equal_to_results = mem::take(&mut self.vectorized_equal_to_results);
332+
for (col_idx, group_col) in self.group_values.iter().enumerate() {
333+
group_col.vectorized_equal_to(
334+
&self.vectorized_equal_to_group_indices,
335+
&cols[col_idx],
336+
&self.vectorized_equal_to_row_indices,
337+
&mut equal_to_results,
338+
);
339+
}
340+
self.vectorized_equal_to_results = equal_to_results;
326341
}
327342
}
328343

@@ -411,25 +426,25 @@ impl GroupValues for GroupValuesColumn {
411426
batch_hashes.resize(n_rows, 0);
412427
create_hashes(cols, &self.random_state, &mut batch_hashes)?;
413428

414-
// General steps for one round `vectorized compare & append`:
429+
// General steps for one round `vectorized equal_to & append`:
415430
// 1. Collect vectorized context by checking hash values of `cols` in `map`
416-
// 2. Perform `vectorized compare`
417-
// 3. Perform `vectorized append`
431+
// 2. Perform `vectorized_equal_to`
432+
// 3. Perform `vectorized_append`
418433
// 4. Reset the checking flag in `BucketContext`
419434

420435
let num_rows = cols[0].len();
421436
self.current_indices.clear();
422437
self.current_indices.extend(0..num_rows);
423438
while self.current_indices.len() > 0 {
424439
self.vectorized_append_row_indices.clear();
425-
self.vectorized_compare_row_indices.clear();
426-
self.vectorized_compare_group_indices.clear();
427-
self.vectorized_compare_results.clear();
440+
self.vectorized_equal_to_row_indices.clear();
441+
self.vectorized_equal_to_group_indices.clear();
442+
self.vectorized_equal_to_results.clear();
428443

429444
// 1. Collect vectorized context by checking hash values of `cols` in `map`
430445
self.collect_vectorized_process_context(&batch_hashes);
431446

432-
// 2. Perform `vectorized compare`
447+
// 2. Perform `vectorized_equal_to`
433448
}
434449

435450
self.hashes_buffer = batch_hashes;

datafusion/physical-plan/src/aggregates/group_values/group_column.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@ pub trait GroupColumn: Send + Sync {
6262
/// Appends the row at `row` in `array` to this builder
6363
fn append_val(&mut self, array: &ArrayRef, row: usize);
6464

65-
fn vectorized_compare(
66-
&mut self,
65+
fn vectorized_equal_to(
66+
&self,
6767
group_indices: &[usize],
6868
array: &ArrayRef,
6969
rows: &[usize],
70-
compare_results: &mut [bool],
70+
equal_to_results: &mut [bool],
7171
);
7272

7373
fn vectorized_append(&mut self, array: &ArrayRef, rows: &[usize], all_non_null: bool);
@@ -142,18 +142,18 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
142142
}
143143
}
144144

145-
fn vectorized_compare(
146-
&mut self,
145+
fn vectorized_equal_to(
146+
&self,
147147
group_indices: &[usize],
148148
array: &ArrayRef,
149149
rows: &[usize],
150-
compare_results: &mut [bool],
150+
equal_to_results: &mut [bool],
151151
) {
152152
let array = array.as_primitive::<T>();
153153

154154
for (idx, &lhs_row) in group_indices.iter().enumerate() {
155155
// Has found not equal to, don't need to check
156-
if !compare_results[idx] {
156+
if !equal_to_results[idx] {
157157
continue;
158158
}
159159

@@ -163,13 +163,13 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
163163
let exist_null = self.nulls.is_null(lhs_row);
164164
let input_null = array.is_null(rhs_row);
165165
if let Some(result) = nulls_equal_to(exist_null, input_null) {
166-
compare_results[idx] = result;
166+
equal_to_results[idx] = result;
167167
continue;
168168
}
169169
// Otherwise, we need to check their values
170170
}
171171

172-
compare_results[idx] = self.group_values[lhs_row] == array.value(rhs_row);
172+
equal_to_results[idx] = self.group_values[lhs_row] == array.value(rhs_row);
173173
}
174174
}
175175

@@ -404,12 +404,12 @@ where
404404
};
405405
}
406406

407-
fn vectorized_compare(
408-
&mut self,
407+
fn vectorized_equal_to(
408+
&self,
409409
group_indices: &[usize],
410410
array: &ArrayRef,
411411
rows: &[usize],
412-
compare_results: &mut [bool],
412+
equal_to_results: &mut [bool],
413413
) {
414414
todo!()
415415
}
@@ -976,12 +976,12 @@ impl<B: ByteViewType> GroupColumn for ByteViewGroupValueBuilder<B> {
976976
self.append_val_inner(array, row)
977977
}
978978

979-
fn vectorized_compare(
980-
&mut self,
979+
fn vectorized_equal_to(
980+
&self,
981981
group_indices: &[usize],
982982
array: &ArrayRef,
983983
rows: &[usize],
984-
compare_results: &mut [bool],
984+
equal_to_results: &mut [bool],
985985
) {
986986
todo!()
987987
}

0 commit comments

Comments
 (0)