@@ -32,11 +32,11 @@ use arrow::datatypes::GenericBinaryType;
32
32
use arrow:: datatypes:: GenericStringType ;
33
33
use datafusion_common:: utils:: proxy:: VecAllocExt ;
34
34
35
+ use crate :: aggregates:: group_values:: null_builder:: MaybeNullBufferBuilder ;
36
+ use datafusion_physical_expr_common:: binary_map:: { OutputType , INITIAL_BUFFER_CAPACITY } ;
35
37
use std:: sync:: Arc ;
36
38
use std:: vec;
37
39
38
- use datafusion_physical_expr_common:: binary_map:: { OutputType , INITIAL_BUFFER_CAPACITY } ;
39
-
40
40
/// Trait for storing a single column of group values in [`GroupValuesColumn`]
41
41
///
42
42
/// Implementations of this trait store an in-progress collection of group values
@@ -47,6 +47,8 @@ use datafusion_physical_expr_common::binary_map::{OutputType, INITIAL_BUFFER_CAP
47
47
pub trait GroupColumn : Send + Sync {
48
48
/// Returns equal if the row stored in this builder at `lhs_row` is equal to
49
49
/// the row in `array` at `rhs_row`
50
+ ///
51
+ /// Note that this comparison returns true if both elements are NULL
50
52
fn equal_to ( & self , lhs_row : usize , array : & ArrayRef , rhs_row : usize ) -> bool ;
51
53
/// Appends the row at `row` in `array` to this builder
52
54
fn append_val ( & mut self , array : & ArrayRef , row : usize ) ;
@@ -61,61 +63,96 @@ pub trait GroupColumn: Send + Sync {
61
63
fn take_n ( & mut self , n : usize ) -> ArrayRef ;
62
64
}
63
65
64
- /// An implementation of [`GroupColumn`] for primitive types.
65
- pub struct PrimitiveGroupValueBuilder < T : ArrowPrimitiveType > {
66
+ /// An implementation of [`GroupColumn`] for primitive values which are known to have no nulls
67
+ #[ derive( Debug ) ]
68
+ pub struct NonNullPrimitiveGroupValueBuilder < T : ArrowPrimitiveType > {
66
69
group_values : Vec < T :: Native > ,
67
- nulls : Vec < bool > ,
68
- /// whether the array contains at least one null, for fast non-null path
69
- has_null : bool ,
70
- /// Can the input array contain nulls?
71
- nullable : bool ,
72
70
}
73
71
74
- impl < T > PrimitiveGroupValueBuilder < T >
72
+ impl < T > NonNullPrimitiveGroupValueBuilder < T >
75
73
where
76
74
T : ArrowPrimitiveType ,
77
75
{
78
- pub fn new ( nullable : bool ) -> Self {
76
+ pub fn new ( ) -> Self {
79
77
Self {
80
78
group_values : vec ! [ ] ,
81
- nulls : vec ! [ ] ,
82
- has_null : false ,
83
- nullable,
84
79
}
85
80
}
86
81
}
87
82
88
- impl < T : ArrowPrimitiveType > GroupColumn for PrimitiveGroupValueBuilder < T > {
83
+ impl < T : ArrowPrimitiveType > GroupColumn for NonNullPrimitiveGroupValueBuilder < T > {
89
84
fn equal_to ( & self , lhs_row : usize , array : & ArrayRef , rhs_row : usize ) -> bool {
90
- // non-null fast path
91
- // both non-null
92
- if !self . nullable {
93
- return self . group_values [ lhs_row]
94
- == array. as_primitive :: < T > ( ) . value ( rhs_row) ;
95
- }
85
+ // know input has no nulls
86
+ self . group_values [ lhs_row] == array. as_primitive :: < T > ( ) . value ( rhs_row)
87
+ }
96
88
97
- // lhs is non-null
98
- if self . nulls [ lhs_row] {
99
- if array. is_null ( rhs_row) {
100
- return false ;
101
- }
89
+ fn append_val ( & mut self , array : & ArrayRef , row : usize ) {
90
+ // input can't possibly have nulls, so don't worry about them
91
+ self . group_values . push ( array. as_primitive :: < T > ( ) . value ( row) )
92
+ }
93
+
94
+ fn len ( & self ) -> usize {
95
+ self . group_values . len ( )
96
+ }
97
+
98
+ fn size ( & self ) -> usize {
99
+ self . group_values . allocated_size ( )
100
+ }
101
+
102
+ fn build ( self : Box < Self > ) -> ArrayRef {
103
+ let Self { group_values } = * self ;
102
104
103
- return self . group_values [ lhs_row]
104
- == array. as_primitive :: < T > ( ) . value ( rhs_row) ;
105
+ let nulls = None ;
106
+
107
+ Arc :: new ( PrimitiveArray :: < T > :: new (
108
+ ScalarBuffer :: from ( group_values) ,
109
+ nulls,
110
+ ) )
111
+ }
112
+
113
+ fn take_n ( & mut self , n : usize ) -> ArrayRef {
114
+ let first_n = self . group_values . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
115
+ let first_n_nulls = None ;
116
+
117
+ Arc :: new ( PrimitiveArray :: < T > :: new (
118
+ ScalarBuffer :: from ( first_n) ,
119
+ first_n_nulls,
120
+ ) )
121
+ }
122
+ }
123
+
124
+ /// An implementation of [`GroupColumn`] for primitive values which may have nulls
125
+ #[ derive( Debug ) ]
126
+ pub struct PrimitiveGroupValueBuilder < T : ArrowPrimitiveType > {
127
+ group_values : Vec < T :: Native > ,
128
+ nulls : MaybeNullBufferBuilder ,
129
+ }
130
+
131
+ impl < T > PrimitiveGroupValueBuilder < T >
132
+ where
133
+ T : ArrowPrimitiveType ,
134
+ {
135
+ pub fn new ( ) -> Self {
136
+ Self {
137
+ group_values : vec ! [ ] ,
138
+ nulls : MaybeNullBufferBuilder :: new ( ) ,
105
139
}
140
+ }
141
+ }
106
142
107
- array. is_null ( rhs_row)
143
+ impl < T : ArrowPrimitiveType > GroupColumn for PrimitiveGroupValueBuilder < T > {
144
+ fn equal_to ( & self , lhs_row : usize , array : & ArrayRef , rhs_row : usize ) -> bool {
145
+ self . nulls . is_null ( lhs_row) == array. is_null ( rhs_row)
146
+ && self . group_values [ lhs_row] == array. as_primitive :: < T > ( ) . value ( rhs_row)
108
147
}
109
148
110
149
fn append_val ( & mut self , array : & ArrayRef , row : usize ) {
111
- if self . nullable && array. is_null ( row) {
150
+ if array. is_null ( row) {
151
+ self . nulls . append ( true ) ;
112
152
self . group_values . push ( T :: default_value ( ) ) ;
113
- self . nulls . push ( false ) ;
114
- self . has_null = true ;
115
153
} else {
116
- let elem = array. as_primitive :: < T > ( ) . value ( row) ;
117
- self . group_values . push ( elem) ;
118
- self . nulls . push ( true ) ;
154
+ self . nulls . append ( false ) ;
155
+ self . group_values . push ( array. as_primitive :: < T > ( ) . value ( row) ) ;
119
156
}
120
157
}
121
158
@@ -128,32 +165,27 @@ impl<T: ArrowPrimitiveType> GroupColumn for PrimitiveGroupValueBuilder<T> {
128
165
}
129
166
130
167
fn build ( self : Box < Self > ) -> ArrayRef {
131
- if self . has_null {
132
- Arc :: new ( PrimitiveArray :: < T > :: new (
133
- ScalarBuffer :: from ( self . group_values ) ,
134
- Some ( NullBuffer :: from ( self . nulls ) ) ,
135
- ) )
136
- } else {
137
- Arc :: new ( PrimitiveArray :: < T > :: new (
138
- ScalarBuffer :: from ( self . group_values ) ,
139
- None ,
140
- ) )
141
- }
168
+ let Self {
169
+ group_values ,
170
+ nulls ,
171
+ } = * self ;
172
+
173
+ let nulls = nulls . build ( ) ;
174
+
175
+ Arc :: new ( PrimitiveArray :: < T > :: new (
176
+ ScalarBuffer :: from ( group_values ) ,
177
+ nulls ,
178
+ ) )
142
179
}
143
180
144
181
fn take_n ( & mut self , n : usize ) -> ArrayRef {
145
- if self . has_null {
146
- let first_n = self . group_values . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
147
- let first_n_nulls = self . nulls . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
148
- Arc :: new ( PrimitiveArray :: < T > :: new (
149
- ScalarBuffer :: from ( first_n) ,
150
- Some ( NullBuffer :: from ( first_n_nulls) ) ,
151
- ) )
152
- } else {
153
- let first_n = self . group_values . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
154
- self . nulls . truncate ( self . nulls . len ( ) - n) ;
155
- Arc :: new ( PrimitiveArray :: < T > :: new ( ScalarBuffer :: from ( first_n) , None ) )
156
- }
182
+ let first_n = self . group_values . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
183
+ let first_n_nulls = self . nulls . take_n ( n) ;
184
+
185
+ Arc :: new ( PrimitiveArray :: < T > :: new (
186
+ ScalarBuffer :: from ( first_n) ,
187
+ first_n_nulls,
188
+ ) )
157
189
}
158
190
}
159
191
0 commit comments