Skip to content

Unparser and recursion limit #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,9 @@ config_namespace! {
/// query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected
/// and recorded in the logical plan nodes.
pub collect_spans: bool, default = false

/// Specifies the recursion depth limit when parsing complex SQL Queries
pub recursion_limit: usize, default = 50
}
}

Expand Down
18 changes: 16 additions & 2 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,12 +483,21 @@ impl SessionState {
MsSQL, ClickHouse, BigQuery, Ansi."
)
})?;
let mut statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;

let recursion_limit = self.config.options().sql_parser.recursion_limit;

let mut statements = DFParser::parse_sql_with_dialect_limit(
sql,
dialect.as_ref(),
recursion_limit,
)?;

if statements.len() > 1 {
return not_impl_err!(
"The context currently only supports a single SQL statement"
);
}

let statement = statements.pop_front().ok_or_else(|| {
plan_datafusion_err!("No SQL statements were provided in the query string")
})?;
Expand Down Expand Up @@ -522,7 +531,12 @@ impl SessionState {
)
})?;

let expr = DFParser::parse_sql_into_expr_with_dialect(sql, dialect.as_ref())?;
let recursion_limit = self.config.options().sql_parser.recursion_limit;
let expr = DFParser::parse_sql_into_expr_with_dialect_limit(
sql,
dialect.as_ref(),
recursion_limit,
)?;

Ok(expr)
}
Expand Down
44 changes: 40 additions & 4 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), ParserError> {
Ok(())
}

/// Same as `sqlparser`
const DEFAULT_RECURSION_LIMIT: usize = 50;

/// DataFusion SQL Parser based on [`sqlparser`]
///
/// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`].
Expand All @@ -282,20 +285,31 @@ impl<'a> DFParser<'a> {
pub fn new_with_dialect(
sql: &str,
dialect: &'a dyn Dialect,
) -> Result<Self, ParserError> {
DFParser::new_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT)
}
/// Create a new parser for the specified tokens with the
/// specified dialect and recursion limit
pub fn new_with_dialect_limit(
sql: &str,
dialect: &'a dyn Dialect,
recursion_limit: usize,
) -> Result<Self, ParserError> {
let mut tokenizer = Tokenizer::new(dialect, sql);
let tokens = tokenizer.tokenize_with_location()?;

Ok(DFParser {
parser: Parser::new(dialect).with_tokens_with_locations(tokens),
parser: Parser::new(dialect)
.with_tokens_with_locations(tokens)
.with_recursion_limit(recursion_limit),
})
}

/// Parse a sql string into one or [`Statement`]s using the
/// [`GenericDialect`].
pub fn parse_sql(sql: &str) -> Result<VecDeque<Statement>, ParserError> {
let dialect = &GenericDialect {};
DFParser::parse_sql_with_dialect(sql, dialect)
DFParser::parse_sql_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT)
}

/// Parse a SQL string and produce one or more [`Statement`]s with
Expand All @@ -304,7 +318,17 @@ impl<'a> DFParser<'a> {
sql: &str,
dialect: &dyn Dialect,
) -> Result<VecDeque<Statement>, ParserError> {
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
DFParser::parse_sql_with_dialect_limit(sql, dialect, DEFAULT_RECURSION_LIMIT)
}

/// Parse a SQL string and produce one or more [`Statement`]s with
/// with the specified dialect and recursion limit
pub fn parse_sql_with_dialect_limit(
sql: &str,
dialect: &dyn Dialect,
recursion_limit: usize,
) -> Result<VecDeque<Statement>, ParserError> {
let mut parser = DFParser::new_with_dialect_limit(sql, dialect, recursion_limit)?;
let mut stmts = VecDeque::new();
let mut expecting_statement_delimiter = false;
loop {
Expand All @@ -331,7 +355,19 @@ impl<'a> DFParser<'a> {
sql: &str,
dialect: &dyn Dialect,
) -> Result<ExprWithAlias, ParserError> {
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
DFParser::parse_sql_into_expr_with_dialect_limit(
sql,
dialect,
DEFAULT_RECURSION_LIMIT,
)
}

pub fn parse_sql_into_expr_with_dialect_limit(
sql: &str,
dialect: &dyn Dialect,
recursion_limit: usize,
) -> Result<ExprWithAlias, ParserError> {
let mut parser = DFParser::new_with_dialect_limit(sql, dialect, recursion_limit)?;
parser.parse_expr()
}

Expand Down
4 changes: 1 addition & 3 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1624,9 +1624,7 @@ impl Unparser<'_> {
DataType::Union(_, _) => {
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
}
DataType::Dictionary(_, _) => {
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
}
DataType::Dictionary(_, val) => self.arrow_dtype_to_ast_dtype(val),
DataType::Decimal128(precision, scale)
| DataType::Decimal256(precision, scale) => {
let mut new_precision = *precision as u64;
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.recursion_limit 50
datafusion.sql_parser.support_varchar_with_length true

# show all variables with verbose
Expand Down Expand Up @@ -357,6 +358,7 @@ datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusi
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.

# show_variable_in_config_options
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. |
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. |
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |