@@ -27,30 +27,39 @@ use arrow::util::display::{ArrayFormatter, FormatOptions};
2727use datafusion:: arrow:: datatypes:: Schema ;
2828use datafusion:: arrow:: pyarrow:: { PyArrowType , ToPyArrow } ;
2929use datafusion:: arrow:: util:: pretty;
30- use datafusion:: common:: UnnestOptions ;
30+ use datafusion:: common:: stats:: Precision ;
31+ use datafusion:: common:: { DFSchema , DataFusionError , Statistics , UnnestOptions } ;
32+ use datafusion:: common:: tree_node:: { Transformed , TreeNode } ;
3133use datafusion:: config:: { CsvOptions , TableParquetOptions } ;
3234use datafusion:: dataframe:: { DataFrame , DataFrameWriteOptions } ;
35+ use datafusion:: datasource:: memory:: DataSourceExec ;
3336use datafusion:: datasource:: TableProvider ;
34- use datafusion:: execution:: SendableRecordBatchStream ;
37+ use datafusion:: datasource:: physical_plan:: FileScanConfig ;
38+ use datafusion:: datasource:: source:: DataSource ;
39+ use datafusion:: execution:: { SendableRecordBatchStream } ;
3540use datafusion:: parquet:: basic:: { BrotliLevel , Compression , GzipLevel , ZstdLevel } ;
41+ use datafusion:: physical_plan:: { ExecutionPlan , ExecutionPlanProperties } ;
3642use datafusion:: prelude:: * ;
43+
44+ use datafusion_proto:: physical_plan:: { AsExecutionPlan , PhysicalExtensionCodec } ;
45+ use datafusion_proto:: protobuf:: PhysicalPlanNode ;
46+ use deltalake:: delta_datafusion:: DeltaPhysicalCodec ;
47+ use prost:: Message ;
3748use pyo3:: exceptions:: PyValueError ;
3849use pyo3:: prelude:: * ;
3950use pyo3:: pybacked:: PyBackedStr ;
4051use pyo3:: types:: { PyCapsule , PyTuple , PyTupleMethods } ;
4152use tokio:: task:: JoinHandle ;
4253
4354use crate :: catalog:: PyTable ;
55+ use crate :: common:: df_schema:: PyDFSchema ;
4456use crate :: errors:: { py_datafusion_err, PyDataFusionError } ;
4557use crate :: expr:: sort_expr:: to_sort_expressions;
4658use crate :: physical_plan:: PyExecutionPlan ;
4759use crate :: record_batch:: PyRecordBatchStream ;
4860use crate :: sql:: logical:: PyLogicalPlan ;
4961use crate :: utils:: { get_tokio_runtime, validate_pycapsule, wait_for_future} ;
50- use crate :: {
51- errors:: PyDataFusionResult ,
52- expr:: { sort_expr:: PySortExpr , PyExpr } ,
53- } ;
62+ use crate :: { errors:: PyDataFusionResult , expr:: { sort_expr:: PySortExpr , PyExpr } } ;
5463
5564// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
5665// - we have not decided on the table_provider approach yet
@@ -697,6 +706,137 @@ impl PyDataFrame {
697706 fn count ( & self , py : Python ) -> PyDataFusionResult < usize > {
698707 Ok ( wait_for_future ( py, self . df . as_ref ( ) . clone ( ) . count ( ) ) ?)
699708 }
709+
710+ fn distributed_plan ( & self , py : Python < ' _ > ) -> PyResult < DistributedPlan > {
711+ let future_plan = DistributedPlan :: try_new ( self . df . as_ref ( ) ) ;
712+ wait_for_future ( py, future_plan) . map_err ( py_datafusion_err)
713+ }
714+
715+ }
716+
717+ #[ pyclass( get_all) ]
718+ #[ derive( Debug , Clone ) ]
719+ pub struct DistributedPlan {
720+ repartition_file_min_size : usize ,
721+ physical_plan : PyExecutionPlan ,
722+ }
723+
724+ #[ pymethods]
725+ impl DistributedPlan {
726+
727+ fn serialize ( & self ) -> PyResult < Vec < u8 > > {
728+ PhysicalPlanNode :: try_from_physical_plan ( self . plan ( ) . clone ( ) , codec ( ) )
729+ . map ( |node| node. encode_to_vec ( ) )
730+ . map_err ( py_datafusion_err)
731+ }
732+
733+ fn partition_count ( & self ) -> usize {
734+ self . plan ( ) . output_partitioning ( ) . partition_count ( )
735+ }
736+
737+ fn num_bytes ( & self ) -> Option < usize > {
738+ self . stats_field ( |stats| stats. total_byte_size )
739+ }
740+
741+ fn num_rows ( & self ) -> Option < usize > {
742+ self . stats_field ( |stats| stats. num_rows )
743+ }
744+
745+ fn schema ( & self ) -> PyResult < PyDFSchema > {
746+ DFSchema :: try_from ( self . plan ( ) . schema ( ) )
747+ . map ( PyDFSchema :: from)
748+ . map_err ( py_datafusion_err)
749+ }
750+
751+ fn set_desired_parallelism ( & mut self , desired_parallelism : usize ) -> PyResult < ( ) > {
752+ if self . plan ( ) . output_partitioning ( ) . partition_count ( ) == desired_parallelism {
753+ return Ok ( ( ) )
754+ }
755+ let updated_plan = self . plan ( ) . clone ( ) . transform_up ( |node| {
756+ if let Some ( exec) = node. as_any ( ) . downcast_ref :: < DataSourceExec > ( ) {
757+ // Remove redundant ranges from partition files because FileScanConfig refuses to repartition
758+ // if any file has a range defined (even when the range actually covers the entire file).
759+ // The EnforceDistribution optimizer rule adds ranges for both full and partial files,
760+ // so this tries to revert that in order to trigger a repartition when no files are actually split.
761+ if let Some ( file_scan) = exec. data_source ( ) . as_any ( ) . downcast_ref :: < FileScanConfig > ( ) {
762+ let mut range_free_file_scan = file_scan. clone ( ) ;
763+ for group in range_free_file_scan. file_groups . iter_mut ( ) {
764+ for file in group. iter_mut ( ) {
765+ if let Some ( range) = & file. range {
766+ if range. start == 0 && range. end == file. object_meta . size as i64 {
767+ file. range = None ; // remove redundant range
768+ }
769+ }
770+ }
771+ }
772+ let ordering = range_free_file_scan. eq_properties ( ) . output_ordering ( ) ;
773+ if let Some ( repartitioned) = range_free_file_scan
774+ . repartitioned ( desired_parallelism, self . repartition_file_min_size , ordering) ? {
775+ return Ok ( Transformed :: yes ( Arc :: new ( DataSourceExec :: new ( repartitioned) ) ) )
776+ }
777+ }
778+ }
779+ Ok ( Transformed :: no ( node) )
780+ } ) . map_err ( py_datafusion_err) ?. data ;
781+ self . physical_plan = PyExecutionPlan :: new ( updated_plan) ;
782+ Ok ( ( ) )
783+ }
784+ }
785+
786+ impl DistributedPlan {
787+
788+ async fn try_new ( df : & DataFrame ) -> Result < Self , DataFusionError > {
789+ let ( mut session_state, logical_plan) = df. clone ( ) . into_parts ( ) ;
790+ let repartition_file_min_size = session_state. config_options ( ) . optimizer . repartition_file_min_size ;
791+ // Create the physical plan with a single partition, to ensure that no files are split into ranges.
792+ // Otherwise, any subsequent repartition attempt would fail (see the comment in `set_desired_parallelism`)
793+ session_state. config_mut ( ) . options_mut ( ) . execution . target_partitions = 1 ;
794+ let physical_plan = session_state. create_physical_plan ( & logical_plan) . await ?;
795+ let physical_plan = PyExecutionPlan :: new ( physical_plan) ;
796+ Ok ( Self {
797+ repartition_file_min_size,
798+ physical_plan,
799+ } )
800+ }
801+
802+ fn plan ( & self ) -> & Arc < dyn ExecutionPlan > {
803+ & self . physical_plan . plan
804+ }
805+
806+ fn stats_field ( & self , field : fn ( Statistics ) -> Precision < usize > ) -> Option < usize > {
807+ if let Ok ( stats) = self . physical_plan . plan . statistics ( ) {
808+ match field ( stats) {
809+ Precision :: Exact ( n) => Some ( n) ,
810+ _ => None ,
811+ }
812+ } else {
813+ None
814+ }
815+ }
816+
817+ }
818+
819+ #[ pyfunction]
820+ pub fn partition_stream ( serialized_plan : & [ u8 ] , partition : usize , py : Python ) -> PyResult < PyRecordBatchStream > {
821+ deltalake:: ensure_initialized ( ) ;
822+ let node = PhysicalPlanNode :: decode ( serialized_plan)
823+ . map_err ( |e| DataFusionError :: External ( Box :: new ( e) ) )
824+ . map_err ( py_datafusion_err) ?;
825+ let ctx = SessionContext :: new ( ) ;
826+ let plan = node. try_into_physical_plan ( & ctx, ctx. runtime_env ( ) . as_ref ( ) , codec ( ) )
827+ . map_err ( py_datafusion_err) ?;
828+ let stream_with_runtime = get_tokio_runtime ( ) . 0 . spawn ( async move {
829+ plan. execute ( partition, ctx. task_ctx ( ) )
830+ } ) ;
831+ wait_for_future ( py, stream_with_runtime)
832+ . map_err ( py_datafusion_err) ?
833+ . map ( PyRecordBatchStream :: new)
834+ . map_err ( py_datafusion_err)
835+ }
836+
837+ fn codec ( ) -> & ' static dyn PhysicalExtensionCodec {
838+ static CODEC : DeltaPhysicalCodec = DeltaPhysicalCodec { } ;
839+ & CODEC
700840}
701841
702842/// Print DataFrame
0 commit comments