Skip to content

Commit 80f4322

Browse files
authored
Minor: reuse Rows buffer in GroupValuesRows (#10980)
1 parent 2a49d61 commit 80f4322

File tree

1 file changed

+13
-3
lines changed
  • datafusion/physical-plan/src/aggregates/group_values

1 file changed

+13
-3
lines changed

datafusion/physical-plan/src/aggregates/group_values/row.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,12 @@ pub struct GroupValuesRows {
5959
/// [`Row`]: arrow::row::Row
6060
group_values: Option<Rows>,
6161

62-
// buffer to be reused to store hashes
62+
/// reused buffer to store hashes
6363
hashes_buffer: Vec<u64>,
6464

65+
/// reused buffer to store rows
66+
rows_buffer: Rows,
67+
6568
/// Random state for creating hashes
6669
random_state: RandomState,
6770
}
@@ -78,13 +81,18 @@ impl GroupValuesRows {
7881

7982
let map = RawTable::with_capacity(0);
8083

84+
let starting_rows_capacity = 1000;
85+
let starting_data_capacity = 64 * starting_rows_capacity;
86+
let rows_buffer =
87+
row_converter.empty_rows(starting_rows_capacity, starting_data_capacity);
8188
Ok(Self {
8289
schema,
8390
row_converter,
8491
map,
8592
map_size: 0,
8693
group_values: None,
8794
hashes_buffer: Default::default(),
95+
rows_buffer,
8896
random_state: Default::default(),
8997
})
9098
}
@@ -93,8 +101,9 @@ impl GroupValuesRows {
93101
impl GroupValues for GroupValuesRows {
94102
fn intern(&mut self, cols: &[ArrayRef], groups: &mut Vec<usize>) -> Result<()> {
95103
// Convert the group keys into the row format
96-
// Avoid reallocation when https://github.com/apache/arrow-rs/issues/4479 is available
97-
let group_rows = self.row_converter.convert_columns(cols)?;
104+
let group_rows = &mut self.rows_buffer;
105+
group_rows.clear();
106+
self.row_converter.append(group_rows, cols)?;
98107
let n_rows = group_rows.num_rows();
99108

100109
let mut group_values = match self.group_values.take() {
@@ -150,6 +159,7 @@ impl GroupValues for GroupValuesRows {
150159
self.row_converter.size()
151160
+ group_values_size
152161
+ self.map_size
162+
+ self.rows_buffer.size()
153163
+ self.hashes_buffer.allocated_size()
154164
}
155165

0 commit comments

Comments
 (0)