Skip to content

Commit 99bf509

Browse files
Bugfix: Remove df-cli specific SQL statment options before executing with DataFusion (#8426)
* remove df-cli specific options from create external table options * add test and comments * cargo fmt * merge main * cargo toml format
1 parent d9d8ddd commit 99bf509

File tree

4 files changed

+58
-34
lines changed

4 files changed

+58
-34
lines changed

datafusion-cli/Cargo.lock

Lines changed: 10 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-cli/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,6 @@ url = "2.2"
4848
[dev-dependencies]
4949
assert_cmd = "2.0"
5050
ctor = "0.2.0"
51+
datafusion-common = { path = "../datafusion/common" }
5152
predicates = "3.0"
5253
rstest = "0.17"

datafusion-cli/src/exec.rs

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ async fn exec_and_print(
211211
})?;
212212
let statements = DFParser::parse_sql_with_dialect(&sql, dialect.as_ref())?;
213213
for statement in statements {
214-
let plan = ctx.state().statement_to_plan(statement).await?;
214+
let mut plan = ctx.state().statement_to_plan(statement).await?;
215215

216216
// For plans like `Explain` ignore `MaxRows` option and always display all rows
217217
let should_ignore_maxrows = matches!(
@@ -221,10 +221,12 @@ async fn exec_and_print(
221221
| LogicalPlan::Analyze(_)
222222
);
223223

224-
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
224+
// Note that cmd is a mutable reference so that create_external_table function can remove all
225+
// datafusion-cli specific options before passing through to datafusion. Otherwise, datafusion
226+
// will raise Configuration errors.
227+
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
225228
create_external_table(ctx, cmd).await?;
226229
}
227-
228230
let df = ctx.execute_logical_plan(plan).await?;
229231
let results = df.collect().await?;
230232

@@ -244,7 +246,7 @@ async fn exec_and_print(
244246

245247
async fn create_external_table(
246248
ctx: &SessionContext,
247-
cmd: &CreateExternalTable,
249+
cmd: &mut CreateExternalTable,
248250
) -> Result<()> {
249251
let table_path = ListingTableUrl::parse(&cmd.location)?;
250252
let scheme = table_path.scheme();
@@ -285,15 +287,32 @@ async fn create_external_table(
285287

286288
#[cfg(test)]
287289
mod tests {
290+
use std::str::FromStr;
291+
288292
use super::*;
289293
use datafusion::common::plan_err;
294+
use datafusion_common::{file_options::StatementOptions, FileTypeWriterOptions};
290295

291296
async fn create_external_table_test(location: &str, sql: &str) -> Result<()> {
292297
let ctx = SessionContext::new();
293-
let plan = ctx.state().create_logical_plan(sql).await?;
298+
let mut plan = ctx.state().create_logical_plan(sql).await?;
294299

295-
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
300+
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
296301
create_external_table(&ctx, cmd).await?;
302+
let options: Vec<_> = cmd
303+
.options
304+
.iter()
305+
.map(|(k, v)| (k.clone(), v.clone()))
306+
.collect();
307+
let statement_options = StatementOptions::new(options);
308+
let file_type =
309+
datafusion_common::FileType::from_str(cmd.file_type.as_str())?;
310+
311+
let _file_type_writer_options = FileTypeWriterOptions::build(
312+
&file_type,
313+
ctx.state().config_options(),
314+
&statement_options,
315+
)?;
297316
} else {
298317
return plan_err!("LogicalPlan is not a CreateExternalTable");
299318
}

datafusion-cli/src/object_storage.rs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,23 @@ use url::Url;
3030

3131
pub async fn get_s3_object_store_builder(
3232
url: &Url,
33-
cmd: &CreateExternalTable,
33+
cmd: &mut CreateExternalTable,
3434
) -> Result<AmazonS3Builder> {
3535
let bucket_name = get_bucket_name(url)?;
3636
let mut builder = AmazonS3Builder::from_env().with_bucket_name(bucket_name);
3737

3838
if let (Some(access_key_id), Some(secret_access_key)) = (
39-
cmd.options.get("access_key_id"),
40-
cmd.options.get("secret_access_key"),
39+
// These options are datafusion-cli specific and must be removed before passing through to datafusion.
40+
// Otherwise, a Configuration error will be raised.
41+
cmd.options.remove("access_key_id"),
42+
cmd.options.remove("secret_access_key"),
4143
) {
44+
println!("removing secret access key!");
4245
builder = builder
4346
.with_access_key_id(access_key_id)
4447
.with_secret_access_key(secret_access_key);
4548

46-
if let Some(session_token) = cmd.options.get("session_token") {
49+
if let Some(session_token) = cmd.options.remove("session_token") {
4750
builder = builder.with_token(session_token);
4851
}
4952
} else {
@@ -66,7 +69,7 @@ pub async fn get_s3_object_store_builder(
6669
builder = builder.with_credentials(credentials);
6770
}
6871

69-
if let Some(region) = cmd.options.get("region") {
72+
if let Some(region) = cmd.options.remove("region") {
7073
builder = builder.with_region(region);
7174
}
7275

@@ -99,7 +102,7 @@ impl CredentialProvider for S3CredentialProvider {
99102

100103
pub fn get_oss_object_store_builder(
101104
url: &Url,
102-
cmd: &CreateExternalTable,
105+
cmd: &mut CreateExternalTable,
103106
) -> Result<AmazonS3Builder> {
104107
let bucket_name = get_bucket_name(url)?;
105108
let mut builder = AmazonS3Builder::from_env()
@@ -109,15 +112,15 @@ pub fn get_oss_object_store_builder(
109112
.with_region("do_not_care");
110113

111114
if let (Some(access_key_id), Some(secret_access_key)) = (
112-
cmd.options.get("access_key_id"),
113-
cmd.options.get("secret_access_key"),
115+
cmd.options.remove("access_key_id"),
116+
cmd.options.remove("secret_access_key"),
114117
) {
115118
builder = builder
116119
.with_access_key_id(access_key_id)
117120
.with_secret_access_key(secret_access_key);
118121
}
119122

120-
if let Some(endpoint) = cmd.options.get("endpoint") {
123+
if let Some(endpoint) = cmd.options.remove("endpoint") {
121124
builder = builder.with_endpoint(endpoint);
122125
}
123126

@@ -126,21 +129,21 @@ pub fn get_oss_object_store_builder(
126129

127130
pub fn get_gcs_object_store_builder(
128131
url: &Url,
129-
cmd: &CreateExternalTable,
132+
cmd: &mut CreateExternalTable,
130133
) -> Result<GoogleCloudStorageBuilder> {
131134
let bucket_name = get_bucket_name(url)?;
132135
let mut builder = GoogleCloudStorageBuilder::from_env().with_bucket_name(bucket_name);
133136

134-
if let Some(service_account_path) = cmd.options.get("service_account_path") {
137+
if let Some(service_account_path) = cmd.options.remove("service_account_path") {
135138
builder = builder.with_service_account_path(service_account_path);
136139
}
137140

138-
if let Some(service_account_key) = cmd.options.get("service_account_key") {
141+
if let Some(service_account_key) = cmd.options.remove("service_account_key") {
139142
builder = builder.with_service_account_key(service_account_key);
140143
}
141144

142145
if let Some(application_credentials_path) =
143-
cmd.options.get("application_credentials_path")
146+
cmd.options.remove("application_credentials_path")
144147
{
145148
builder = builder.with_application_credentials(application_credentials_path);
146149
}
@@ -180,9 +183,9 @@ mod tests {
180183
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}', 'region' '{region}', 'session_token' {session_token}) LOCATION '{location}'");
181184

182185
let ctx = SessionContext::new();
183-
let plan = ctx.state().create_logical_plan(&sql).await?;
186+
let mut plan = ctx.state().create_logical_plan(&sql).await?;
184187

185-
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
188+
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
186189
let builder = get_s3_object_store_builder(table_url.as_ref(), cmd).await?;
187190
// get the actual configuration information, then assert_eq!
188191
let config = [
@@ -212,9 +215,9 @@ mod tests {
212215
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}', 'endpoint' '{endpoint}') LOCATION '{location}'");
213216

214217
let ctx = SessionContext::new();
215-
let plan = ctx.state().create_logical_plan(&sql).await?;
218+
let mut plan = ctx.state().create_logical_plan(&sql).await?;
216219

217-
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
220+
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
218221
let builder = get_oss_object_store_builder(table_url.as_ref(), cmd)?;
219222
// get the actual configuration information, then assert_eq!
220223
let config = [
@@ -244,9 +247,9 @@ mod tests {
244247
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('service_account_path' '{service_account_path}', 'service_account_key' '{service_account_key}', 'application_credentials_path' '{application_credentials_path}') LOCATION '{location}'");
245248

246249
let ctx = SessionContext::new();
247-
let plan = ctx.state().create_logical_plan(&sql).await?;
250+
let mut plan = ctx.state().create_logical_plan(&sql).await?;
248251

249-
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan {
252+
if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan {
250253
let builder = get_gcs_object_store_builder(table_url.as_ref(), cmd)?;
251254
// get the actual configuration information, then assert_eq!
252255
let config = [

0 commit comments

Comments
 (0)