@@ -25,7 +25,7 @@ use arrow::array::{
25
25
use arrow:: buffer:: { Buffer , MutableBuffer , NullBuffer } ;
26
26
use arrow:: datatypes:: DataType ;
27
27
28
- use datafusion_common:: cast:: as_generic_string_array;
28
+ use datafusion_common:: cast:: { as_generic_string_array, as_string_view_array } ;
29
29
use datafusion_common:: Result ;
30
30
use datafusion_common:: { exec_err, ScalarValue } ;
31
31
use datafusion_expr:: ColumnarValue ;
@@ -49,6 +49,7 @@ impl Display for TrimType {
49
49
pub ( crate ) fn general_trim < T : OffsetSizeTrait > (
50
50
args : & [ ArrayRef ] ,
51
51
trim_type : TrimType ,
52
+ use_string_view : bool ,
52
53
) -> Result < ArrayRef > {
53
54
let func = match trim_type {
54
55
TrimType :: Left => |input, pattern : & str | {
@@ -68,6 +69,74 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
68
69
} ,
69
70
} ;
70
71
72
+ if use_string_view {
73
+ string_view_trim :: < T > ( trim_type, func, args)
74
+ } else {
75
+ string_trim :: < T > ( trim_type, func, args)
76
+ }
77
+ }
78
+
79
+ // removing 'a will cause compiler complaining lifetime of `func`
80
+ fn string_view_trim < ' a , T : OffsetSizeTrait > (
81
+ trim_type : TrimType ,
82
+ func : fn ( & ' a str , & ' a str ) -> & ' a str ,
83
+ args : & ' a [ ArrayRef ] ,
84
+ ) -> Result < ArrayRef > {
85
+ let string_array = as_string_view_array ( & args[ 0 ] ) ?;
86
+
87
+ match args. len ( ) {
88
+ 1 => {
89
+ let result = string_array
90
+ . iter ( )
91
+ . map ( |string| string. map ( |string : & str | func ( string, " " ) ) )
92
+ . collect :: < GenericStringArray < T > > ( ) ;
93
+
94
+ Ok ( Arc :: new ( result) as ArrayRef )
95
+ }
96
+ 2 => {
97
+ let characters_array = as_string_view_array ( & args[ 1 ] ) ?;
98
+
99
+ if characters_array. len ( ) == 1 {
100
+ if characters_array. is_null ( 0 ) {
101
+ return Ok ( new_null_array (
102
+ // The schema is expecting utf8 as null
103
+ & DataType :: Utf8 ,
104
+ string_array. len ( ) ,
105
+ ) ) ;
106
+ }
107
+
108
+ let characters = characters_array. value ( 0 ) ;
109
+ let result = string_array
110
+ . iter ( )
111
+ . map ( |item| item. map ( |string| func ( string, characters) ) )
112
+ . collect :: < GenericStringArray < T > > ( ) ;
113
+ return Ok ( Arc :: new ( result) as ArrayRef ) ;
114
+ }
115
+
116
+ let result = string_array
117
+ . iter ( )
118
+ . zip ( characters_array. iter ( ) )
119
+ . map ( |( string, characters) | match ( string, characters) {
120
+ ( Some ( string) , Some ( characters) ) => Some ( func ( string, characters) ) ,
121
+ _ => None ,
122
+ } )
123
+ . collect :: < GenericStringArray < T > > ( ) ;
124
+
125
+ Ok ( Arc :: new ( result) as ArrayRef )
126
+ }
127
+ other => {
128
+ exec_err ! (
129
+ "{trim_type} was called with {other} arguments. It requires at least 1 and at most 2."
130
+ )
131
+ }
132
+ }
133
+ }
134
+
135
+ fn string_trim < ' a , T : OffsetSizeTrait > (
136
+ trim_type : TrimType ,
137
+ func : fn ( & ' a str , & ' a str ) -> & ' a str ,
138
+ args : & ' a [ ArrayRef ] ,
139
+ ) -> Result < ArrayRef > {
71
140
let string_array = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
72
141
73
142
match args. len ( ) {
@@ -84,7 +153,10 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
84
153
85
154
if characters_array. len ( ) == 1 {
86
155
if characters_array. is_null ( 0 ) {
87
- return Ok ( new_null_array ( args[ 0 ] . data_type ( ) , args[ 0 ] . len ( ) ) ) ;
156
+ return Ok ( new_null_array (
157
+ string_array. data_type ( ) ,
158
+ string_array. len ( ) ,
159
+ ) ) ;
88
160
}
89
161
90
162
let characters = characters_array. value ( 0 ) ;
@@ -109,7 +181,7 @@ pub(crate) fn general_trim<T: OffsetSizeTrait>(
109
181
other => {
110
182
exec_err ! (
111
183
"{trim_type} was called with {other} arguments. It requires at least 1 and at most 2."
112
- )
184
+ )
113
185
}
114
186
}
115
187
}
0 commit comments