Skip to content

Config: Add support default sql varchar to view types #15104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,11 @@ config_namespace! {
/// string length and thus DataFusion can not enforce such limits.
pub support_varchar_with_length: bool, default = true

/// If true, `VARCHAR` is mapped to `Utf8View` during SQL planning.
/// If false, `VARCHAR` is mapped to `Utf8` during SQL planning.
/// Default is false.
pub map_varchar_to_utf8view: bool, default = false

/// When set to true, the source locations relative to the original SQL
/// query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected
/// and recorded in the logical plan nodes.
Expand Down
1 change: 1 addition & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,7 @@ impl SessionState {
enable_options_value_normalization: sql_parser_options
.enable_options_value_normalization,
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
map_varchar_to_utf8view: sql_parser_options.map_varchar_to_utf8view,
collect_spans: sql_parser_options.collect_spans,
}
}
Expand Down
18 changes: 17 additions & 1 deletion datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ pub struct ParserOptions {
pub enable_options_value_normalization: bool,
/// Whether to collect spans
pub collect_spans: bool,
/// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
pub map_varchar_to_utf8view: bool,
}

impl ParserOptions {
Expand All @@ -72,6 +74,7 @@ impl ParserOptions {
parse_float_as_decimal: false,
enable_ident_normalization: true,
support_varchar_with_length: true,
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
}
Expand Down Expand Up @@ -111,6 +114,12 @@ impl ParserOptions {
self
}

/// Sets the `map_varchar_to_utf8view` option.
pub fn with_map_varchar_to_utf8view(mut self, value: bool) -> Self {
self.map_varchar_to_utf8view = value;
self
}

/// Sets the `enable_options_value_normalization` option.
pub fn with_enable_options_value_normalization(mut self, value: bool) -> Self {
self.enable_options_value_normalization = value;
Expand All @@ -136,6 +145,7 @@ impl From<&SqlParserOptions> for ParserOptions {
parse_float_as_decimal: options.parse_float_as_decimal,
enable_ident_normalization: options.enable_ident_normalization,
support_varchar_with_length: options.support_varchar_with_length,
map_varchar_to_utf8view: options.map_varchar_to_utf8view,
enable_options_value_normalization: options
.enable_options_value_normalization,
collect_spans: options.collect_spans,
Expand Down Expand Up @@ -558,7 +568,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::Varchar(length) => {
match (length, self.options.support_varchar_with_length) {
(Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"),
_ => Ok(DataType::Utf8),
_ => {
if self.options.map_varchar_to_utf8view {
Ok(DataType::Utf8View)
} else {
Ok(DataType::Utf8)
}
}
}
}
SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64),
Expand Down
2 changes: 2 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ fn parse_decimals() {
parse_float_as_decimal: true,
enable_ident_normalization: false,
support_varchar_with_length: false,
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
},
Expand Down Expand Up @@ -148,6 +149,7 @@ fn parse_ident_normalization() {
parse_float_as_decimal: false,
enable_ident_normalization,
support_varchar_with_length: false,
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
},
Expand Down
28 changes: 28 additions & 0 deletions datafusion/sqllogictest/test_files/ddl.slt
Original file line number Diff line number Diff line change
Expand Up @@ -827,3 +827,31 @@ drop table table_with_pk;

statement ok
set datafusion.catalog.information_schema = false;

# Test VARCHAR is mapped to Utf8View during SQL planning when setting map_varchar_to_utf8view to true
statement ok
CREATE TABLE t1(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);

query TTT
DESCRIBE t1;
----
c1 Utf8 NO
c2 Utf8 YES

statement ok
set datafusion.sql_parser.map_varchar_to_utf8view = true;

statement ok
CREATE TABLE t2(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);

query TTT
DESCRIBE t2;
----
c1 Utf8View NO
c2 Utf8View YES

statement ok
DROP TABLE t1;

statement ok
DROP TABLE t2;
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ datafusion.sql_parser.collect_spans false
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
datafusion.sql_parser.map_varchar_to_utf8view false
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.recursion_limit 50
datafusion.sql_parser.support_varchar_with_length true
Expand Down Expand Up @@ -361,6 +362,7 @@ datafusion.sql_parser.collect_spans false When set to true, the source locations
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false.
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit when parsing complex SQL Queries
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. |
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. |
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |
| datafusion.sql_parser.map_varchar_to_utf8view | false | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. |
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. |
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |