Skip to content

Commit d5f19f3

Browse files
eliaperantonialamb
andauthored
Add DataFusionError::Collection to return multiple DataFusionErrors (#14439)
* feat: collect multiple errors when possible * chore: format * doc: explain implementation of `DataFusionError::{message,source}` * feat: update cli lockfile * doc: explain `DataFusionError::iter` * chore: cargo fmt --all * fix: test * chore: update slt files * fix: tests * fix: strip backtrace in slt tests * feat: pr feedback * chore: fix clippy * fix: test * update expected * more update * revert test in errors * try again --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 22fb5f7 commit d5f19f3

File tree

12 files changed

+234
-31
lines changed

12 files changed

+234
-31
lines changed

datafusion/common/src/error.rs

Lines changed: 108 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use std::backtrace::{Backtrace, BacktraceStatus};
2121

2222
use std::borrow::Cow;
23+
use std::collections::VecDeque;
2324
use std::error::Error;
2425
use std::fmt::{Display, Formatter};
2526
use std::io;
@@ -136,6 +137,13 @@ pub enum DataFusionError {
136137
/// human-readable messages, and locations in the source query that relate
137138
/// to the error in some way.
138139
Diagnostic(Box<Diagnostic>, Box<DataFusionError>),
140+
/// A collection of one or more [`DataFusionError`]. Useful in cases where
141+
/// DataFusion can recover from an erroneous state, and produce more errors
142+
/// before terminating. e.g. when planning a SELECT clause, DataFusion can
143+
/// synchronize to the next `SelectItem` if the previous one had errors. The
144+
/// end result is that the user can see errors about all `SelectItem`,
145+
/// instead of just the first one.
146+
Collection(Vec<DataFusionError>),
139147
/// A [`DataFusionError`] which shares an underlying [`DataFusionError`].
140148
///
141149
/// This is useful when the same underlying [`DataFusionError`] is passed
@@ -360,6 +368,14 @@ impl Error for DataFusionError {
360368
DataFusionError::Context(_, e) => Some(e.as_ref()),
361369
DataFusionError::Substrait(_) => None,
362370
DataFusionError::Diagnostic(_, e) => Some(e.as_ref()),
371+
// Can't really make a Collection fit into the mold of "an error has
372+
// at most one source", but returning the first one is probably good
373+
// idea. Especially since `DataFusionError::Collection` is mostly
374+
// meant for consumption by the end user, so shouldn't interfere
375+
// with programmatic usage too much. Plus, having 1 or 5 errors
376+
// doesn't really change the fact that the query is invalid and
377+
// can't be executed.
378+
DataFusionError::Collection(errs) => errs.first().map(|e| e as &dyn Error),
363379
DataFusionError::Shared(e) => Some(e.as_ref()),
364380
}
365381
}
@@ -463,18 +479,27 @@ impl DataFusionError {
463479
DataFusionError::ObjectStore(_) => "Object Store error: ",
464480
DataFusionError::IoError(_) => "IO error: ",
465481
DataFusionError::SQL(_, _) => "SQL error: ",
466-
DataFusionError::NotImplemented(_) => "This feature is not implemented: ",
482+
DataFusionError::NotImplemented(_) => {
483+
"This feature is not implemented: "
484+
}
467485
DataFusionError::Internal(_) => "Internal error: ",
468486
DataFusionError::Plan(_) => "Error during planning: ",
469-
DataFusionError::Configuration(_) => "Invalid or Unsupported Configuration: ",
487+
DataFusionError::Configuration(_) => {
488+
"Invalid or Unsupported Configuration: "
489+
}
470490
DataFusionError::SchemaError(_, _) => "Schema error: ",
471491
DataFusionError::Execution(_) => "Execution error: ",
472492
DataFusionError::ExecutionJoin(_) => "ExecutionJoin error: ",
473-
DataFusionError::ResourcesExhausted(_) => "Resources exhausted: ",
493+
DataFusionError::ResourcesExhausted(_) => {
494+
"Resources exhausted: "
495+
}
474496
DataFusionError::External(_) => "External error: ",
475497
DataFusionError::Context(_, _) => "",
476498
DataFusionError::Substrait(_) => "Substrait error: ",
477499
DataFusionError::Diagnostic(_, _) => "",
500+
DataFusionError::Collection(errs) => {
501+
errs.first().expect("cannot construct DataFusionError::Collection with 0 errors, but got one such case").error_prefix()
502+
}
478503
DataFusionError::Shared(_) => "",
479504
}
480505
}
@@ -517,6 +542,13 @@ impl DataFusionError {
517542
}
518543
DataFusionError::Substrait(ref desc) => Cow::Owned(desc.to_string()),
519544
DataFusionError::Diagnostic(_, ref err) => Cow::Owned(err.to_string()),
545+
// Returning the message of the first error is probably fine enough,
546+
// and makes `DataFusionError::Collection` a transparent wrapped,
547+
// unless the end user explicitly calls `DataFusionError::iter`.
548+
DataFusionError::Collection(ref errs) => errs
549+
.first()
550+
.expect("cannot construct DataFusionError::Collection with 0 errors")
551+
.message(),
520552
DataFusionError::Shared(ref desc) => Cow::Owned(desc.to_string()),
521553
}
522554
}
@@ -569,6 +601,63 @@ impl DataFusionError {
569601

570602
DiagnosticsIterator { head: self }.next()
571603
}
604+
605+
/// Sometimes DataFusion is able to collect multiple errors in a SQL query
606+
/// before terminating, e.g. across different expressions in a SELECT
607+
/// statements or different sides of a UNION. This method returns an
608+
/// iterator over all the errors in the collection.
609+
///
610+
/// For this to work, the top-level error must be a
611+
/// `DataFusionError::Collection`, not something that contains it.
612+
pub fn iter(&self) -> impl Iterator<Item = &DataFusionError> {
613+
struct ErrorIterator<'a> {
614+
queue: VecDeque<&'a DataFusionError>,
615+
}
616+
617+
impl<'a> Iterator for ErrorIterator<'a> {
618+
type Item = &'a DataFusionError;
619+
620+
fn next(&mut self) -> Option<Self::Item> {
621+
loop {
622+
let popped = self.queue.pop_front()?;
623+
match popped {
624+
DataFusionError::Collection(errs) => self.queue.extend(errs),
625+
_ => return Some(popped),
626+
}
627+
}
628+
}
629+
}
630+
631+
let mut queue = VecDeque::new();
632+
queue.push_back(self);
633+
ErrorIterator { queue }
634+
}
635+
}
636+
637+
pub struct DataFusionErrorBuilder(Vec<DataFusionError>);
638+
639+
impl DataFusionErrorBuilder {
640+
pub fn new() -> Self {
641+
Self(Vec::new())
642+
}
643+
644+
pub fn add_error(&mut self, error: DataFusionError) {
645+
self.0.push(error);
646+
}
647+
648+
pub fn error_or<T>(self, ok: T) -> Result<T, DataFusionError> {
649+
match self.0.len() {
650+
0 => Ok(ok),
651+
1 => Err(self.0.into_iter().next().expect("length matched 1")),
652+
_ => Err(DataFusionError::Collection(self.0)),
653+
}
654+
}
655+
}
656+
657+
impl Default for DataFusionErrorBuilder {
658+
fn default() -> Self {
659+
Self::new()
660+
}
572661
}
573662

574663
/// Unwrap an `Option` if possible. Otherwise return an `DataFusionError::Internal`.
@@ -954,4 +1043,20 @@ mod test {
9541043
assert_eq!(e.strip_backtrace(), exp.strip_backtrace());
9551044
assert_eq!(std::mem::discriminant(e), std::mem::discriminant(&exp),)
9561045
}
1046+
1047+
#[test]
1048+
fn test_iter() {
1049+
let err = DataFusionError::Collection(vec![
1050+
DataFusionError::Plan("a".to_string()),
1051+
DataFusionError::Collection(vec![
1052+
DataFusionError::Plan("b".to_string()),
1053+
DataFusionError::Plan("c".to_string()),
1054+
]),
1055+
]);
1056+
let errs = err.iter().collect::<Vec<_>>();
1057+
assert_eq!(errs.len(), 3);
1058+
assert_eq!(errs[0].strip_backtrace(), "Error during planning: a");
1059+
assert_eq!(errs[1].strip_backtrace(), "Error during planning: b");
1060+
assert_eq!(errs[2].strip_backtrace(), "Error during planning: c");
1061+
}
9571062
}

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,9 @@ fn get_valid_types_with_scalar_udf(
261261
TypeSignature::UserDefined => match func.coerce_types(current_types) {
262262
Ok(coerced_types) => Ok(vec![coerced_types]),
263263
Err(e) => exec_err!(
264-
"Function '{}' user-defined coercion failed with {e:?}",
265-
func.name()
264+
"Function '{}' user-defined coercion failed with {:?}",
265+
func.name(),
266+
e.strip_backtrace()
266267
),
267268
},
268269
TypeSignature::OneOf(signatures) => {
@@ -304,8 +305,9 @@ fn get_valid_types_with_aggregate_udf(
304305
Ok(coerced_types) => vec![coerced_types],
305306
Err(e) => {
306307
return exec_err!(
307-
"Function '{}' user-defined coercion failed with {e:?}",
308-
func.name()
308+
"Function '{}' user-defined coercion failed with {:?}",
309+
func.name(),
310+
e.strip_backtrace()
309311
)
310312
}
311313
},
@@ -332,8 +334,9 @@ fn get_valid_types_with_window_udf(
332334
Ok(coerced_types) => vec![coerced_types],
333335
Err(e) => {
334336
return exec_err!(
335-
"Function '{}' user-defined coercion failed with {e:?}",
336-
func.name()
337+
"Function '{}' user-defined coercion failed with {:?}",
338+
func.name(),
339+
e.strip_backtrace()
337340
)
338341
}
339342
},

datafusion/sql/src/select.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use crate::utils::{
2525
CheckColumnsSatisfyExprsPurpose,
2626
};
2727

28+
use datafusion_common::error::DataFusionErrorBuilder;
2829
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
2930
use datafusion_common::{not_impl_err, plan_err, Result};
3031
use datafusion_common::{RecursionUnnestOption, UnnestOptions};
@@ -574,10 +575,15 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
574575
empty_from: bool,
575576
planner_context: &mut PlannerContext,
576577
) -> Result<Vec<Expr>> {
577-
projection
578-
.into_iter()
579-
.map(|expr| self.sql_select_to_rex(expr, plan, empty_from, planner_context))
580-
.collect::<Result<Vec<Expr>>>()
578+
let mut prepared_select_exprs = vec![];
579+
let mut error_builder = DataFusionErrorBuilder::new();
580+
for expr in projection {
581+
match self.sql_select_to_rex(expr, plan, empty_from, planner_context) {
582+
Ok(expr) => prepared_select_exprs.push(expr),
583+
Err(err) => error_builder.add_error(err),
584+
}
585+
}
586+
error_builder.error_or(prepared_select_exprs)
581587
}
582588

583589
/// Generate a relational expression from a select SQL expression

datafusion/sql/src/set_expr.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
// under the License.
1717

1818
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
19-
use datafusion_common::{not_impl_err, plan_err, Diagnostic, Result, Span};
19+
use datafusion_common::{
20+
not_impl_err, plan_err, DataFusionError, Diagnostic, Result, Span,
21+
};
2022
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
2123
use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier, Spanned};
2224

@@ -39,8 +41,19 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
3941
} => {
4042
let left_span = Span::try_from_sqlparser_span(left.span());
4143
let right_span = Span::try_from_sqlparser_span(right.span());
42-
let left_plan = self.set_expr_to_plan(*left, planner_context)?;
43-
let right_plan = self.set_expr_to_plan(*right, planner_context)?;
44+
let left_plan = self.set_expr_to_plan(*left, planner_context);
45+
let right_plan = self.set_expr_to_plan(*right, planner_context);
46+
let (left_plan, right_plan) = match (left_plan, right_plan) {
47+
(Ok(left_plan), Ok(right_plan)) => (left_plan, right_plan),
48+
(Err(left_err), Err(right_err)) => {
49+
return Err(DataFusionError::Collection(vec![
50+
left_err, right_err,
51+
]));
52+
}
53+
(Err(err), _) | (_, Err(err)) => {
54+
return Err(err);
55+
}
56+
};
4457
self.validate_set_expr_num_of_columns(
4558
op,
4659
left_span,
@@ -49,6 +62,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
4962
&right_plan,
5063
set_expr_span,
5164
)?;
65+
5266
self.set_operation_to_plan(op, left_plan, right_plan, set_quantifier)
5367
}
5468
SetExpr::Query(q) => self.query_to_plan(*q, planner_context),
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use datafusion_common::{assert_contains, DataFusionError};
19+
use datafusion_sql::planner::SqlToRel;
20+
use sqlparser::{dialect::GenericDialect, parser::Parser};
21+
22+
use crate::{MockContextProvider, MockSessionState};
23+
24+
fn do_query(sql: &'static str) -> DataFusionError {
25+
let dialect = GenericDialect {};
26+
let statement = Parser::new(&dialect)
27+
.try_with_sql(sql)
28+
.expect("unable to create parser")
29+
.parse_statement()
30+
.expect("unable to parse query");
31+
let state = MockSessionState::default();
32+
let context = MockContextProvider { state };
33+
let sql_to_rel = SqlToRel::new(&context);
34+
sql_to_rel
35+
.sql_statement_to_plan(statement)
36+
.expect_err("expected error")
37+
}
38+
39+
#[test]
40+
fn test_collect_select_items() {
41+
let query = "SELECT first_namex, last_namex FROM person";
42+
let error = do_query(query);
43+
let errors = error.iter().collect::<Vec<_>>();
44+
assert_eq!(errors.len(), 2);
45+
assert!(errors[0]
46+
.to_string()
47+
.contains("No field named first_namex."));
48+
assert_contains!(errors[1].to_string(), "No field named last_namex.");
49+
}
50+
51+
#[test]
52+
fn test_collect_set_exprs() {
53+
let query = "SELECT first_namex FROM person UNION ALL SELECT last_namex FROM person";
54+
let error = do_query(query);
55+
let errors = error.iter().collect::<Vec<_>>();
56+
assert_eq!(errors.len(), 2);
57+
assert_contains!(errors[0].to_string(), "No field named first_namex.");
58+
assert_contains!(errors[1].to_string(), "No field named last_namex.");
59+
}

datafusion/sql/tests/cases/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
mod collection;
1819
mod diagnostic;
1920
mod plan_to_sql;

datafusion/sql/tests/sql_integration.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4422,10 +4422,17 @@ fn plan_create_index() {
44224422
}
44234423
}
44244424

4425-
fn assert_field_not_found(err: DataFusionError, name: &str) {
4426-
let err = match err {
4427-
DataFusionError::Diagnostic(_, err) => *err,
4428-
err => err,
4425+
fn assert_field_not_found(mut err: DataFusionError, name: &str) {
4426+
let err = loop {
4427+
match err {
4428+
DataFusionError::Diagnostic(_, wrapped_err) => {
4429+
err = *wrapped_err;
4430+
}
4431+
DataFusionError::Collection(errs) => {
4432+
err = errs.into_iter().next().unwrap();
4433+
}
4434+
err => break err,
4435+
}
44294436
};
44304437
match err {
44314438
DataFusionError::SchemaError { .. } => {

datafusion/sqllogictest/src/engines/datafusion_engine/error.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ pub enum DFSqlLogicTestError {
3030
#[error("SqlLogicTest error(from sqllogictest-rs crate): {0}")]
3131
SqlLogicTest(#[from] TestError),
3232
/// Error from datafusion
33-
#[error("DataFusion error: {0}")]
33+
#[error("DataFusion error: {}", .0.strip_backtrace())]
3434
DataFusion(#[from] DataFusionError),
3535
/// Error returned when SQL is syntactically incorrect.
3636
#[error("SQL Parser error: {0}")]

datafusion/sqllogictest/test_files/errors.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ SELECT
148148
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type
149149
create table foo as values (1), ('foo');
150150

151-
query error No function matches
151+
query error user-defined coercion failed
152152
select 1 group by substr('');
153153

154154
# Error in filter should be reported

0 commit comments

Comments
 (0)