@@ -41,14 +41,15 @@ use arrow::compute;
41
41
use arrow:: datatypes:: {
42
42
ArrowNativeType , Field , Schema , SchemaBuilder , UInt32Type , UInt64Type ,
43
43
} ;
44
+ use arrow_schema:: SchemaRef ;
44
45
use datafusion_common:: cast:: as_boolean_array;
45
46
use datafusion_common:: stats:: Precision ;
46
47
use datafusion_common:: tree_node:: { Transformed , TransformedResult , TreeNode } ;
47
48
use datafusion_common:: {
48
49
plan_err, DataFusionError , JoinSide , JoinType , Result , SharedResult ,
49
50
} ;
50
51
use datafusion_expr:: interval_arithmetic:: Interval ;
51
- use datafusion_physical_expr:: equivalence:: add_offset_to_expr;
52
+ use datafusion_physical_expr:: equivalence:: { add_offset_to_expr, ProjectionMapping } ;
52
53
use datafusion_physical_expr:: expressions:: Column ;
53
54
use datafusion_physical_expr:: utils:: { collect_columns, merge_vectors} ;
54
55
use datafusion_physical_expr:: {
@@ -62,6 +63,7 @@ use crate::projection::ProjectionExec;
62
63
use futures:: future:: { BoxFuture , Shared } ;
63
64
use futures:: { ready, FutureExt } ;
64
65
use parking_lot:: Mutex ;
66
+ use crate :: common:: can_project;
65
67
66
68
/// Maps a `u64` hash value based on the build side ["on" values] to a list of indices with this key's value.
67
69
///
@@ -649,6 +651,66 @@ pub fn build_join_schema(
649
651
( fields. finish ( ) . with_metadata ( metadata) , column_indices)
650
652
}
651
653
654
+ /// This assumes that the projections are relative to the join schema.
655
+ /// We need to redo them to point to the actual hash join output schema
656
+ pub fn remap_join_projections_join_to_output (
657
+ left : Arc < dyn ExecutionPlan > ,
658
+ right : Arc < dyn ExecutionPlan > ,
659
+ join_type : & JoinType ,
660
+ projection : Option < Vec < usize > > ,
661
+ ) -> Result < Option < Vec < usize > > > {
662
+ match projection {
663
+ Some ( ref projection) => {
664
+ let ( join_schema, _) = build_join_schema (
665
+ left. schema ( ) . as_ref ( ) ,
666
+ right. schema ( ) . as_ref ( ) ,
667
+ join_type
668
+ ) ;
669
+
670
+ let join_schema = Arc :: new ( join_schema) ;
671
+ can_project ( & join_schema, Some ( projection. clone ( ) ) . as_ref ( ) ) ?;
672
+
673
+ let projection_exprs = project_index_to_exprs (
674
+ & projection. clone ( ) ,
675
+ & join_schema
676
+ ) ;
677
+ let projection_mapping =
678
+ ProjectionMapping :: try_new ( & projection_exprs, & join_schema) ?;
679
+
680
+ // projection mapping contains from and to, get the second one
681
+ let dest_physical_exprs = projection_mapping. map . iter ( ) . map ( |( _, t) | t. clone ( ) ) . collect :: < Vec < _ > > ( ) ;
682
+ let dest_columns = dest_physical_exprs. iter ( ) . map ( |pe| pe. as_any ( ) . downcast_ref :: < Column > ( ) ) . collect :: < Vec < _ > > ( ) ;
683
+ let output = dest_physical_exprs. iter ( ) . enumerate ( ) . map ( |( idx, _) | {
684
+ // :Vec<(Arc<dyn PhysicalExpr>, String)>
685
+ // (pe.clone(), dest_column.name().to_owned())
686
+ let dest_column = dest_columns. get ( idx) . unwrap ( ) . unwrap ( ) ;
687
+ dest_column. index ( )
688
+ } ) . collect :: < Vec < _ > > ( ) ;
689
+ Ok ( Some ( output) )
690
+ } ,
691
+ None => Ok ( None )
692
+ }
693
+ }
694
+
695
+ pub fn project_index_to_exprs (
696
+ projection_index : & [ usize ] ,
697
+ schema : & SchemaRef ,
698
+ ) -> Vec < ( Arc < dyn PhysicalExpr > , String ) > {
699
+ projection_index
700
+ . iter ( )
701
+ . map ( |index| {
702
+ let field = schema. field ( * index) ;
703
+ (
704
+ Arc :: new ( Column :: new (
705
+ field. name ( ) ,
706
+ * index,
707
+ ) ) as Arc < dyn PhysicalExpr > ,
708
+ field. name ( ) . to_owned ( ) ,
709
+ )
710
+ } )
711
+ . collect :: < Vec < _ > > ( )
712
+ }
713
+
652
714
/// A [`OnceAsync`] runs an `async` closure once, where multiple calls to
653
715
/// [`OnceAsync::once`] return a [`OnceFut`] that resolves to the result of the
654
716
/// same computation.
@@ -1755,6 +1817,7 @@ pub(crate) fn reorder_output_after_swap(
1755
1817
left_schema : & Schema ,
1756
1818
right_schema : & Schema ,
1757
1819
) -> Result < Arc < dyn ExecutionPlan > > {
1820
+ //////////////////////
1758
1821
let proj = ProjectionExec :: try_new (
1759
1822
swap_reverting_projection ( left_schema, right_schema) ,
1760
1823
plan,
@@ -1767,7 +1830,7 @@ pub(crate) fn reorder_output_after_swap(
1767
1830
///
1768
1831
/// Returns the expressions that will allow to swap back the values from the
1769
1832
/// original left as the first columns and those on the right next.
1770
- fn swap_reverting_projection (
1833
+ pub fn swap_reverting_projection (
1771
1834
left_schema : & Schema ,
1772
1835
right_schema : & Schema ,
1773
1836
) -> Vec < ( Arc < dyn PhysicalExpr > , String ) > {
0 commit comments