@@ -59,9 +59,12 @@ pub struct GroupValuesRows {
59
59
/// [`Row`]: arrow::row::Row
60
60
group_values : Option < Rows > ,
61
61
62
- // buffer to be reused to store hashes
62
+ /// reused buffer to store hashes
63
63
hashes_buffer : Vec < u64 > ,
64
64
65
+ /// reused buffer to store rows
66
+ rows_buffer : Rows ,
67
+
65
68
/// Random state for creating hashes
66
69
random_state : RandomState ,
67
70
}
@@ -78,13 +81,18 @@ impl GroupValuesRows {
78
81
79
82
let map = RawTable :: with_capacity ( 0 ) ;
80
83
84
+ let starting_rows_capacity = 1000 ;
85
+ let starting_data_capacity = 64 * starting_rows_capacity;
86
+ let rows_buffer =
87
+ row_converter. empty_rows ( starting_rows_capacity, starting_data_capacity) ;
81
88
Ok ( Self {
82
89
schema,
83
90
row_converter,
84
91
map,
85
92
map_size : 0 ,
86
93
group_values : None ,
87
94
hashes_buffer : Default :: default ( ) ,
95
+ rows_buffer,
88
96
random_state : Default :: default ( ) ,
89
97
} )
90
98
}
@@ -93,8 +101,9 @@ impl GroupValuesRows {
93
101
impl GroupValues for GroupValuesRows {
94
102
fn intern ( & mut self , cols : & [ ArrayRef ] , groups : & mut Vec < usize > ) -> Result < ( ) > {
95
103
// Convert the group keys into the row format
96
- // Avoid reallocation when https://github.com/apache/arrow-rs/issues/4479 is available
97
- let group_rows = self . row_converter . convert_columns ( cols) ?;
104
+ let group_rows = & mut self . rows_buffer ;
105
+ group_rows. clear ( ) ;
106
+ self . row_converter . append ( group_rows, cols) ?;
98
107
let n_rows = group_rows. num_rows ( ) ;
99
108
100
109
let mut group_values = match self . group_values . take ( ) {
@@ -150,6 +159,7 @@ impl GroupValues for GroupValuesRows {
150
159
self . row_converter . size ( )
151
160
+ group_values_size
152
161
+ self . map_size
162
+ + self . rows_buffer . size ( )
153
163
+ self . hashes_buffer . allocated_size ( )
154
164
}
155
165
0 commit comments