Skip to content

Commit cf2b7e6

Browse files
Lordwormsalamb
andauthored
Do not unescape backslashes in datafusion-cli (apache#14844)
* correctly treat backslash in datafusion-cli * Remove all cli unescaping * remove unnessasary check * refine test --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 32dab3f commit cf2b7e6

File tree

4 files changed

+21
-62
lines changed

4 files changed

+21
-62
lines changed

datafusion-cli/src/exec.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::helper::split_from_semicolon;
2222
use crate::print_format::PrintFormat;
2323
use crate::{
2424
command::{Command, OutputFormat},
25-
helper::{unescape_input, CliHelper},
25+
helper::CliHelper,
2626
object_storage::get_object_store,
2727
print_options::{MaxRows, PrintOptions},
2828
};
@@ -168,7 +168,7 @@ pub async fn exec_from_repl(
168168
}
169169
}
170170
Ok(line) => {
171-
let lines = split_from_semicolon(line);
171+
let lines = split_from_semicolon(&line);
172172
for line in lines {
173173
rl.add_history_entry(line.trim_end())?;
174174
tokio::select! {
@@ -211,7 +211,6 @@ pub(super) async fn exec_and_print(
211211
sql: String,
212212
) -> Result<()> {
213213
let now = Instant::now();
214-
let sql = unescape_input(&sql)?;
215214
let task_ctx = ctx.task_ctx();
216215
let dialect = &task_ctx.session_config().options().sql_parser.dialect;
217216
let dialect = dialect_from_str(dialect).ok_or_else(|| {

datafusion-cli/src/helper.rs

+14-59
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,8 @@ use std::borrow::Cow;
2222

2323
use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter};
2424

25-
use datafusion::common::sql_datafusion_err;
26-
use datafusion::error::DataFusionError;
2725
use datafusion::sql::parser::{DFParser, Statement};
2826
use datafusion::sql::sqlparser::dialect::dialect_from_str;
29-
use datafusion::sql::sqlparser::parser::ParserError;
3027

3128
use rustyline::completion::{Completer, FilenameCompleter, Pair};
3229
use rustyline::error::ReadlineError;
@@ -63,15 +60,6 @@ impl CliHelper {
6360

6461
fn validate_input(&self, input: &str) -> Result<ValidationResult> {
6562
if let Some(sql) = input.strip_suffix(';') {
66-
let sql = match unescape_input(sql) {
67-
Ok(sql) => sql,
68-
Err(err) => {
69-
return Ok(ValidationResult::Invalid(Some(format!(
70-
" 🤔 Invalid statement: {err}",
71-
))))
72-
}
73-
};
74-
7563
let dialect = match dialect_from_str(&self.dialect) {
7664
Some(dialect) => dialect,
7765
None => {
@@ -166,40 +154,8 @@ impl Validator for CliHelper {
166154

167155
impl Helper for CliHelper {}
168156

169-
/// Unescape input string from readline.
170-
///
171-
/// The data read from stdio will be escaped, so we need to unescape the input before executing the input
172-
pub fn unescape_input(input: &str) -> datafusion::error::Result<String> {
173-
let mut chars = input.chars();
174-
175-
let mut result = String::with_capacity(input.len());
176-
while let Some(char) = chars.next() {
177-
if char == '\\' {
178-
if let Some(next_char) = chars.next() {
179-
// https://static.rust-lang.org/doc/master/reference.html#literals
180-
result.push(match next_char {
181-
'0' => '\0',
182-
'n' => '\n',
183-
'r' => '\r',
184-
't' => '\t',
185-
'\\' => '\\',
186-
_ => {
187-
return Err(sql_datafusion_err!(ParserError::TokenizerError(
188-
format!("unsupported escape char: '\\{}'", next_char)
189-
)))
190-
}
191-
});
192-
}
193-
} else {
194-
result.push(char);
195-
}
196-
}
197-
198-
Ok(result)
199-
}
200-
201157
/// Splits a string which consists of multiple queries.
202-
pub(crate) fn split_from_semicolon(sql: String) -> Vec<String> {
158+
pub(crate) fn split_from_semicolon(sql: &str) -> Vec<String> {
203159
let mut commands = Vec::new();
204160
let mut current_command = String::new();
205161
let mut in_single_quote = false;
@@ -310,14 +266,13 @@ mod tests {
310266
)?;
311267
assert!(matches!(result, ValidationResult::Valid(None)));
312268

313-
// should be invalid
314269
let result = readline_direct(
315-
Cursor::new(
316-
r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\u{07}');"
317-
.as_bytes()),
318-
&validator,
319-
)?;
320-
assert!(matches!(result, ValidationResult::Invalid(Some(_))));
270+
Cursor::new(
271+
r"select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n';".as_bytes(),
272+
),
273+
&validator,
274+
)?;
275+
assert!(matches!(result, ValidationResult::Valid(None)));
321276

322277
Ok(())
323278
}
@@ -346,34 +301,34 @@ mod tests {
346301
fn test_split_from_semicolon() {
347302
let sql = "SELECT 1; SELECT 2;";
348303
let expected = vec!["SELECT 1;", "SELECT 2;"];
349-
assert_eq!(split_from_semicolon(sql.to_string()), expected);
304+
assert_eq!(split_from_semicolon(sql), expected);
350305

351306
let sql = r#"SELECT ";";"#;
352307
let expected = vec![r#"SELECT ";";"#];
353-
assert_eq!(split_from_semicolon(sql.to_string()), expected);
308+
assert_eq!(split_from_semicolon(sql), expected);
354309

355310
let sql = "SELECT ';';";
356311
let expected = vec!["SELECT ';';"];
357-
assert_eq!(split_from_semicolon(sql.to_string()), expected);
312+
assert_eq!(split_from_semicolon(sql), expected);
358313

359314
let sql = r#"SELECT 1; SELECT 'value;value'; SELECT 1 as "text;text";"#;
360315
let expected = vec![
361316
"SELECT 1;",
362317
"SELECT 'value;value';",
363318
r#"SELECT 1 as "text;text";"#,
364319
];
365-
assert_eq!(split_from_semicolon(sql.to_string()), expected);
320+
assert_eq!(split_from_semicolon(sql), expected);
366321

367322
let sql = "";
368323
let expected: Vec<String> = Vec::new();
369-
assert_eq!(split_from_semicolon(sql.to_string()), expected);
324+
assert_eq!(split_from_semicolon(sql), expected);
370325

371326
let sql = "SELECT 1";
372327
let expected = vec!["SELECT 1;"];
373-
assert_eq!(split_from_semicolon(sql.to_string()), expected);
328+
assert_eq!(split_from_semicolon(sql), expected);
374329

375330
let sql = "SELECT 1; ";
376331
let expected = vec!["SELECT 1;"];
377-
assert_eq!(split_from_semicolon(sql.to_string()), expected);
332+
assert_eq!(split_from_semicolon(sql), expected);
378333
}
379334
}

datafusion-cli/tests/cli_integration.rs

+4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ fn init() {
3939
["--command", "select 1; select 2;", "--format", "json", "-q"],
4040
"[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n"
4141
)]
42+
#[case::exec_backslash(
43+
["--file", "tests/data/backslash.txt", "--format", "json", "-q"],
44+
"[{\"Utf8(\\\"\\\\\\\")\":\"\\\\\",\"Utf8(\\\"\\\\\\\\\\\")\":\"\\\\\\\\\",\"Utf8(\\\"\\\\\\\\\\\\\\\\\\\\\\\")\":\"\\\\\\\\\\\\\\\\\\\\\",\"Utf8(\\\"dsdsds\\\\\\\\\\\\\\\\\\\")\":\"dsdsds\\\\\\\\\\\\\\\\\",\"Utf8(\\\"\\\\t\\\")\":\"\\\\t\",\"Utf8(\\\"\\\\0\\\")\":\"\\\\0\",\"Utf8(\\\"\\\\n\\\")\":\"\\\\n\"}]\n"
45+
)]
4246
#[case::exec_from_files(
4347
["--file", "tests/data/sql.txt", "--format", "json", "-q"],
4448
"[{\"Int64(1)\":1}]\n"
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n';

0 commit comments

Comments
 (0)