Skip to content

Commit a1c5e07

Browse files
committed
feat: DataFusionError::find_root
Closes apache#4435.
1 parent 49166ea commit a1c5e07

File tree

1 file changed

+157
-0
lines changed

1 file changed

+157
-0
lines changed

datafusion/common/src/error.rs

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::error;
2121
use std::fmt::{Display, Formatter};
2222
use std::io;
2323
use std::result;
24+
use std::sync::Arc;
2425

2526
use crate::{Column, DFSchema};
2627
#[cfg(feature = "avro")]
@@ -333,8 +334,101 @@ impl From<DataFusionError> for io::Error {
333334
}
334335
}
335336

337+
/// Helper for [`DataFusionError::find_root`].
338+
enum OtherErr<'a> {
339+
Arrow(&'a ArrowError),
340+
Dyn(&'a (dyn std::error::Error + Send + Sync + 'static)),
341+
}
342+
343+
impl DataFusionError {
344+
/// Get underlying error.
345+
///
346+
/// This may be the same as `self`.
347+
pub fn find_root(&self) -> &Self {
348+
// Note: This is a non-recursive algorithm so we do not run out of stack space, even for long error chains. The
349+
// algorithm will always terminate because all steps access the next error through "converging" ownership,
350+
// i.e. there can be a fan-in by multiple parents (e.g. via `Arc`), but never a fan-out by multiple
351+
// children (e.g. via `Weak` or interior mutability via `Mutex`).
352+
353+
// last error in the chain that was a DataFusionError
354+
let mut checkpoint: &Self = self;
355+
356+
// current non-DataFusion error
357+
let mut other_e: Option<OtherErr<'_>> = None;
358+
359+
loop {
360+
// do we have another error type to explore?
361+
if let Some(inner) = other_e {
362+
// `other_e` is now bound to `inner`, so we can clear this path
363+
other_e = None;
364+
365+
match inner {
366+
OtherErr::Arrow(inner) => {
367+
if let ArrowError::ExternalError(inner) = inner {
368+
other_e = Some(OtherErr::Dyn(inner.as_ref()));
369+
continue;
370+
}
371+
}
372+
OtherErr::Dyn(inner) => {
373+
if let Some(inner) = inner.downcast_ref::<Self>() {
374+
checkpoint = inner;
375+
continue;
376+
}
377+
378+
if let Some(inner) = inner.downcast_ref::<ArrowError>() {
379+
other_e = Some(OtherErr::Arrow(inner));
380+
continue;
381+
}
382+
383+
// some errors are wrapped into `Arc`s to share them with multiple receivers
384+
if let Some(inner) = inner.downcast_ref::<Arc<Self>>() {
385+
checkpoint = inner.as_ref();
386+
continue;
387+
}
388+
389+
if let Some(inner) = inner.downcast_ref::<Arc<ArrowError>>() {
390+
other_e = Some(OtherErr::Arrow(inner.as_ref()));
391+
continue;
392+
}
393+
}
394+
}
395+
396+
// dead end?
397+
break;
398+
}
399+
400+
// traverse context chain
401+
if let Self::Context(_msg, inner) = checkpoint {
402+
checkpoint = inner;
403+
continue;
404+
}
405+
406+
// The Arrow error may itself contain a datafusion error again
407+
// See https://github.com/apache/arrow-datafusion/issues/4172
408+
if let Self::ArrowError(inner) = checkpoint {
409+
other_e = Some(OtherErr::Arrow(inner));
410+
continue;
411+
}
412+
413+
// also try to introspect direct external errors
414+
if let Self::External(inner) = checkpoint {
415+
other_e = Some(OtherErr::Dyn(inner.as_ref()));
416+
continue;
417+
}
418+
419+
// no more traversal
420+
break;
421+
}
422+
423+
// return last checkpoint (which may be the original error)
424+
checkpoint
425+
}
426+
}
427+
336428
#[cfg(test)]
337429
mod test {
430+
use std::sync::Arc;
431+
338432
use crate::error::DataFusionError;
339433
use arrow::error::ArrowError;
340434

@@ -353,6 +447,61 @@ mod test {
353447
assert_eq!(res.to_string(), "Arrow error: Schema error: bar");
354448
}
355449

450+
#[test]
451+
fn test_find_root_error() {
452+
do_root_test(
453+
DataFusionError::Context(
454+
"it happened!".to_string(),
455+
Box::new(DataFusionError::ResourcesExhausted("foo".to_string())),
456+
),
457+
DataFusionError::ResourcesExhausted("foo".to_string()),
458+
);
459+
460+
do_root_test(
461+
DataFusionError::ArrowError(ArrowError::ExternalError(Box::new(
462+
DataFusionError::ResourcesExhausted("foo".to_string()),
463+
))),
464+
DataFusionError::ResourcesExhausted("foo".to_string()),
465+
);
466+
467+
do_root_test(
468+
DataFusionError::External(Box::new(DataFusionError::ResourcesExhausted(
469+
"foo".to_string(),
470+
))),
471+
DataFusionError::ResourcesExhausted("foo".to_string()),
472+
);
473+
474+
do_root_test(
475+
DataFusionError::External(Box::new(ArrowError::ExternalError(Box::new(
476+
DataFusionError::ResourcesExhausted("foo".to_string()),
477+
)))),
478+
DataFusionError::ResourcesExhausted("foo".to_string()),
479+
);
480+
481+
do_root_test(
482+
DataFusionError::ArrowError(ArrowError::ExternalError(Box::new(
483+
ArrowError::ExternalError(Box::new(DataFusionError::ResourcesExhausted(
484+
"foo".to_string(),
485+
))),
486+
))),
487+
DataFusionError::ResourcesExhausted("foo".to_string()),
488+
);
489+
490+
do_root_test(
491+
DataFusionError::External(Box::new(Arc::new(
492+
DataFusionError::ResourcesExhausted("foo".to_string()),
493+
))),
494+
DataFusionError::ResourcesExhausted("foo".to_string()),
495+
);
496+
497+
do_root_test(
498+
DataFusionError::External(Box::new(Arc::new(ArrowError::ExternalError(
499+
Box::new(DataFusionError::ResourcesExhausted("foo".to_string())),
500+
)))),
501+
DataFusionError::ResourcesExhausted("foo".to_string()),
502+
);
503+
}
504+
356505
/// Model what happens when implementing SendableRecrordBatchStream:
357506
/// DataFusion code needs to return an ArrowError
358507
#[allow(clippy::try_err)]
@@ -370,6 +519,14 @@ mod test {
370519
Err(ArrowError::SchemaError("bar".to_string()))?;
371520
Ok(())
372521
}
522+
523+
fn do_root_test(e: DataFusionError, exp: DataFusionError) {
524+
let e = e.find_root();
525+
526+
// DataFusionError does not implement Eq, so we use a string comparison + some cheap "same variant" test instead
527+
assert_eq!(e.to_string(), exp.to_string(),);
528+
assert_eq!(std::mem::discriminant(e), std::mem::discriminant(&exp),)
529+
}
373530
}
374531

375532
#[macro_export]

0 commit comments

Comments
 (0)