18
18
//! DataFusion SQL Parser based on [`sqlparser`]
19
19
20
20
use datafusion_common:: parsers:: CompressionTypeVariant ;
21
- use sqlparser:: ast:: OrderByExpr ;
21
+ use sqlparser:: ast:: { OrderByExpr , Query , Value } ;
22
22
use sqlparser:: {
23
23
ast:: {
24
24
ColumnDef , ColumnOptionDef , ObjectName , Statement as SQLStatement ,
@@ -42,6 +42,46 @@ fn parse_file_type(s: &str) -> Result<String, ParserError> {
42
42
Ok ( s. to_uppercase ( ) )
43
43
}
44
44
45
+ /// DataFusion extension DDL for `COPY`
46
+ ///
47
+ /// Syntax:
48
+ ///
49
+ /// ```text
50
+ /// COPY <table_name | (<query>)>
51
+ /// TO
52
+ /// <destination_url>
53
+ /// (key_value_list)
54
+ ///
55
+ /// ```
56
+ /// Examples
57
+ /// ``sql
58
+ /// COPY lineitem TO 'lineitem'
59
+ /// (format parquet,
60
+ /// partitions 16,
61
+ /// row_group_limit_rows 100000,
62
+ // row_group_limit_bytes
63
+ /// )
64
+ ///
65
+ /// COPY (SELECT l_orderkey from lineitem) to 'lineitem.parquet';
66
+ /// ```
67
+ #[ derive( Debug , Clone , PartialEq , Eq ) ]
68
+ pub struct CopyToStatement {
69
+ /// From where the data comes from
70
+ pub source : CopyToSource ,
71
+ /// The URL to where the data is heading
72
+ pub target : String ,
73
+ /// Target specific options
74
+ pub options : HashMap < String , Value > ,
75
+ }
76
+
77
+ #[ derive( Debug , Clone , PartialEq , Eq ) ]
78
+ pub enum CopyToSource {
79
+ /// `COPY <table> TO ...`
80
+ Relation ( ObjectName ) ,
81
+ /// COPY (query...) TO ...
82
+ Query ( Query ) ,
83
+ }
84
+
45
85
/// DataFusion extension DDL for `CREATE EXTERNAL TABLE`
46
86
///
47
87
/// Syntax:
@@ -117,12 +157,14 @@ pub struct DescribeTableStmt {
117
157
/// Tokens parsed by [`DFParser`] are converted into these values.
118
158
#[ derive( Debug , Clone , PartialEq , Eq ) ]
119
159
pub enum Statement {
120
- /// ANSI SQL AST node
160
+ /// ANSI SQL AST node (from sqlparser-rs)
121
161
Statement ( Box < SQLStatement > ) ,
122
162
/// Extension: `CREATE EXTERNAL TABLE`
123
163
CreateExternalTable ( CreateExternalTable ) ,
124
164
/// Extension: `DESCRIBE TABLE`
125
165
DescribeTableStmt ( DescribeTableStmt ) ,
166
+ /// Extension: `COPY TO`
167
+ CopyTo ( CopyToStatement ) ,
126
168
}
127
169
128
170
/// DataFusion SQL Parser based on [`sqlparser`]
@@ -211,6 +253,11 @@ impl<'a> DFParser<'a> {
211
253
// use custom parsing
212
254
self . parse_create ( )
213
255
}
256
+ Keyword :: COPY => {
257
+ // move one token forward
258
+ self . parser . next_token ( ) ;
259
+ self . parse_copy ( )
260
+ }
214
261
Keyword :: DESCRIBE => {
215
262
// move one token forward
216
263
self . parser . next_token ( ) ;
@@ -242,6 +289,37 @@ impl<'a> DFParser<'a> {
242
289
} ) )
243
290
}
244
291
292
+ /// Parse a SQL `COPY TO` statement
293
+ pub fn parse_copy ( & mut self ) -> Result < Statement , ParserError > {
294
+ // parse as a query
295
+ let source = if self . parser . consume_token ( & Token :: LParen ) {
296
+ let query = self . parser . parse_query ( ) ?;
297
+ self . parser . expect_token ( & Token :: RParen ) ?;
298
+ CopyToSource :: Query ( query)
299
+ } else {
300
+ // parse as table reference
301
+ let table_name = self . parser . parse_object_name ( ) ?;
302
+ CopyToSource :: Relation ( table_name)
303
+ } ;
304
+
305
+ self . parser . expect_keyword ( Keyword :: TO ) ?;
306
+
307
+ let target = self . parser . parse_literal_string ( ) ?;
308
+
309
+ // check for options in parens
310
+ let options = if self . parser . peek_token ( ) . token == Token :: LParen {
311
+ self . parse_value_options ( ) ?
312
+ } else {
313
+ HashMap :: new ( )
314
+ } ;
315
+
316
+ Ok ( Statement :: CopyTo ( CopyToStatement {
317
+ source,
318
+ target,
319
+ options,
320
+ } ) )
321
+ }
322
+
245
323
/// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
246
324
pub fn parse_create ( & mut self ) -> Result < Statement , ParserError > {
247
325
if self . parser . parse_keyword ( Keyword :: EXTERNAL ) {
@@ -457,7 +535,7 @@ impl<'a> DFParser<'a> {
457
535
builder. table_partition_cols = Some ( self . parse_partitions ( ) ?)
458
536
} else if self . parser . parse_keyword ( Keyword :: OPTIONS ) {
459
537
ensure_not_set ( & builder. options , "OPTIONS" ) ?;
460
- builder. options = Some ( self . parse_options ( ) ?) ;
538
+ builder. options = Some ( self . parse_string_options ( ) ?) ;
461
539
} else {
462
540
break ;
463
541
}
@@ -513,14 +591,40 @@ impl<'a> DFParser<'a> {
513
591
}
514
592
}
515
593
516
- fn parse_options ( & mut self ) -> Result < HashMap < String , String > , ParserError > {
517
- let mut options: HashMap < String , String > = HashMap :: new ( ) ;
594
+ /// Parses (key value) style options, but values can only be literal strings
595
+ /// TODO maybe change this to be real expressions rather than just strings
596
+ /// the reason
597
+ fn parse_string_options ( & mut self ) -> Result < HashMap < String , String > , ParserError > {
598
+ let mut options = HashMap :: new ( ) ;
518
599
self . parser . expect_token ( & Token :: LParen ) ?;
519
600
520
601
loop {
521
602
let key = self . parser . parse_literal_string ( ) ?;
522
603
let value = self . parser . parse_literal_string ( ) ?;
523
- options. insert ( key. to_string ( ) , value. to_string ( ) ) ;
604
+ options. insert ( key, value) ;
605
+ let comma = self . parser . consume_token ( & Token :: Comma ) ;
606
+ if self . parser . consume_token ( & Token :: RParen ) {
607
+ // allow a trailing comma, even though it's not in standard
608
+ break ;
609
+ } else if !comma {
610
+ return self . expected (
611
+ "',' or ')' after option definition" ,
612
+ self . parser . peek_token ( ) ,
613
+ ) ;
614
+ }
615
+ }
616
+ Ok ( options)
617
+ }
618
+
619
+ /// parses (foo bar) style options into a map of String --> [`Value`]
620
+ fn parse_value_options ( & mut self ) -> Result < HashMap < String , Value > , ParserError > {
621
+ let mut options = HashMap :: new ( ) ;
622
+ self . parser . expect_token ( & Token :: LParen ) ?;
623
+
624
+ loop {
625
+ let key = self . parser . parse_literal_string ( ) ?;
626
+ let value = self . parser . parse_value ( ) ?;
627
+ options. insert ( key, value) ;
524
628
let comma = self . parser . consume_token ( & Token :: Comma ) ;
525
629
if self . parser . consume_token ( & Token :: RParen ) {
526
630
// allow a trailing comma, even though it's not in standard
@@ -560,7 +664,7 @@ mod tests {
560
664
1 ,
561
665
"Expected to parse exactly one statement"
562
666
) ;
563
- assert_eq ! ( statements[ 0 ] , expected) ;
667
+ assert_eq ! ( statements[ 0 ] , expected, "actual: \n {:#?}" , statements [ 0 ] ) ;
564
668
Ok ( ( ) )
565
669
}
566
670
@@ -980,4 +1084,60 @@ mod tests {
980
1084
981
1085
Ok ( ( ) )
982
1086
}
1087
+
1088
+ #[ test]
1089
+ fn copy_to_table_to_table ( ) -> Result < ( ) , ParserError > {
1090
+ // positive case
1091
+ let sql = "COPY foo TO bar" ;
1092
+ let expected = Statement :: CopyTo ( CopyToStatement {
1093
+ source : object_name ( "foo" ) ,
1094
+ target : "bar" . to_string ( ) ,
1095
+ options : HashMap :: new ( ) ,
1096
+ } ) ;
1097
+
1098
+ expect_parse_ok ( sql, expected) ?;
1099
+ Ok ( ( ) )
1100
+ }
1101
+
1102
+ #[ test]
1103
+ fn copy_to_query_to_table ( ) -> Result < ( ) , ParserError > {
1104
+ let mut statements = Parser :: parse_sql ( & GenericDialect { } , "select 1" ) ?;
1105
+ assert_eq ! ( statements. len( ) , 1 ) ;
1106
+ let statement = statements. pop ( ) . unwrap ( ) ;
1107
+ let query = if let SQLStatement :: Query ( query) = statement {
1108
+ * query
1109
+ } else {
1110
+ panic ! ( "Expected query, got {statement:?}" ) ;
1111
+ } ;
1112
+
1113
+ let sql = "COPY (select 1) TO bar" ;
1114
+ let expected = Statement :: CopyTo ( CopyToStatement {
1115
+ source : CopyToSource :: Query ( query) ,
1116
+ target : "bar" . to_string ( ) ,
1117
+ options : HashMap :: new ( ) ,
1118
+ } ) ;
1119
+ expect_parse_ok ( sql, expected) ?;
1120
+ Ok ( ( ) )
1121
+ }
1122
+
1123
+ #[ test]
1124
+ fn copy_to_options ( ) -> Result < ( ) , ParserError > {
1125
+ let sql = "COPY foo TO bar (row_group_size 55)" ;
1126
+ let expected = Statement :: CopyTo ( CopyToStatement {
1127
+ source : object_name ( "foo" ) ,
1128
+ target : "bar" . to_string ( ) ,
1129
+ options : HashMap :: from ( [ (
1130
+ "row_group_size" . to_string ( ) ,
1131
+ Value :: Number ( "55" . to_string ( ) , false ) ,
1132
+ ) ] ) ,
1133
+ } ) ;
1134
+ expect_parse_ok ( sql, expected) ?;
1135
+ Ok ( ( ) )
1136
+ }
1137
+
1138
+ // For error cases, see: `copy.slt`
1139
+
1140
+ fn object_name ( name : & str ) -> CopyToSource {
1141
+ CopyToSource :: Relation ( ObjectName ( vec ! [ Ident :: new( name) ] ) )
1142
+ }
983
1143
}
0 commit comments