hstack
diff --git a/‎datafusion/core/src/datasource/listing/table.rs
+112 b/‎datafusion/core/src/datasource/listing/table.rs
+112
diff --git a/‎datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+2-2 b/‎datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+2-2
diff --git a/‎datafusion/core/src/datasource/physical_plan/parquet/opener.rs
+160-10 b/‎datafusion/core/src/datasource/physical_plan/parquet/opener.rs
+160-10
@@ -958,6 +958,118 @@ impl TableProvider for ListingTable {
             .await
     }
 
+    async fn scan_deep(
+        &self,
+        state: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        projection_deep: Option<&HashMap<usize, Vec<String>>>,
+        filters: &[Expr],
+        limit: Option<usize>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        // extract types of partition columns
+        let table_partition_cols = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
+            .collect::<Result<Vec<_>>>()?;
+
+        let table_partition_col_names = table_partition_cols
+            .iter()
+            .map(|field| field.name().as_str())
+            .collect::<Vec<_>>();
+        // If the filters can be resolved using only partition cols, there is no need to
+        // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
+        let (partition_filters, filters): (Vec<_>, Vec<_>) =
+            filters.iter().cloned().partition(|filter| {
+                can_be_evaluted_for_partition_pruning(&table_partition_col_names, filter)
+            });
+        // TODO (https://github.com/apache/datafusion/issues/11600) remove downcast_ref from here?
+        let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
+
+        // We should not limit the number of partitioned files to scan if there are filters and limit
+        // at the same time. This is because the limit should be applied after the filters are applied.
+        let statistic_file_limit = if filters.is_empty() { limit } else { None };
+
+        let (mut partitioned_file_lists, statistics) = self
+            .list_files_for_scan(session_state, &partition_filters, statistic_file_limit)
+            .await?;
+
+        // if no files need to be read, return an `EmptyExec`
+        if partitioned_file_lists.is_empty() {
+            let projected_schema = project_schema(&self.schema(), projection)?;
+            return Ok(Arc::new(EmptyExec::new(projected_schema)));
+        }
+
+        let output_ordering = self.try_create_output_ordering()?;
+        match state
+            .config_options()
+            .execution
+            .split_file_groups_by_statistics
+            .then(|| {
+                output_ordering.first().map(|output_ordering| {
+                    FileScanConfig::split_groups_by_statistics(
+                        &self.table_schema,
+                        &partitioned_file_lists,
+                        output_ordering,
+                    )
+                })
+            })
+            .flatten()
+        {
+            Some(Err(e)) => log::debug!("failed to split file groups by statistics: {e}"),
+            Some(Ok(new_groups)) => {
+                if new_groups.len() <= self.options.target_partitions {
+                    partitioned_file_lists = new_groups;
+                } else {
+                    log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered")
+                }
+            }
+            None => {} // no ordering required
+        };
+
+        let filters = match conjunction(filters.to_vec()) {
+            Some(expr) => {
+                let table_df_schema = self.table_schema.as_ref().clone().to_dfschema()?;
+                let filters = create_physical_expr(
+                    &expr,
+                    &table_df_schema,
+                    state.execution_props(),
+                )?;
+                Some(filters)
+            }
+            None => None,
+        };
+
+        let Some(object_store_url) =
+            self.table_paths.first().map(ListingTableUrl::object_store)
+        else {
+            return Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty()))));
+        };
+
+        // create the execution plan
+        self.options
+            .format
+            .create_physical_plan(
+                session_state,
+                FileScanConfig::new(
+                    object_store_url,
+                    Arc::clone(&self.file_schema),
+                    self.options.format.file_source(),
+                )
+                .with_file_groups(partitioned_file_lists)
+                .with_constraints(self.constraints.clone())
+                .with_statistics(statistics)
+                .with_projection(projection.cloned())
+                .with_projection_deep(projection_deep.cloned())
+                .with_limit(limit)
+                .with_output_ordering(output_ordering)
+                .with_table_partition_cols(table_partition_cols),
+                filters.as_ref(),
+            )
+            .await
+    }
+
     fn supports_filters_pushdown(
         &self,
         filters: &[&Expr],
 
@@ -233,8 +233,8 @@ impl ParquetExecBuilder {
         }
 
         let base_config = file_scan_config.with_source(Arc::new(parquet.clone()));
-        debug!("Creating ParquetExec, files: {:?}, projection {:?}, predicate: {:?}, limit: {:?}",
-        base_config.file_groups, base_config.projection, predicate, base_config.limit);
+        debug!("Creating ParquetExec, files: {:?}, projection {:?}, projection deep {:?}, predicate: {:?}, limit: {:?}",
+        base_config.file_groups, base_config.projection, base_config.projection_deep, predicate, base_config.limit);
 
         ParquetExec {
             inner: DataSourceExec::new(Arc::new(base_config.clone())),
 
@@ -17,8 +17,6 @@
 
 //! [`ParquetOpener`] for opening Parquet files
 
-use std::sync::Arc;
-
 use crate::datasource::file_format::parquet::{
     coerce_file_schema_to_string_type, coerce_file_schema_to_view_type,
 };
@@ -31,6 +29,9 @@ use crate::datasource::physical_plan::{
     FileMeta, FileOpenFuture, FileOpener, ParquetFileMetrics, ParquetFileReaderFactory,
 };
 use crate::datasource::schema_adapter::SchemaAdapterFactory;
+use std::cmp::min;
+use std::collections::HashMap;
+use std::sync::Arc;
 
 use arrow::datatypes::SchemaRef;
 use arrow::error::ArrowError;
@@ -40,18 +41,21 @@ use datafusion_physical_optimizer::pruning::PruningPredicate;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 
 use futures::{StreamExt, TryStreamExt};
-use log::debug;
+use log::{debug, info, trace};
 use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
+use parquet::schema::types::SchemaDescriptor;
+// use datafusion_common::DataFusionError;
+use datafusion_common::deep::{has_deep_projection, rewrite_schema, splat_columns};
 
 /// Implements [`FileOpener`] for a parquet file
 pub(super) struct ParquetOpener {
     /// Execution partition index
     pub partition_index: usize,
     /// Column indexes in `table_schema` needed by the query
     pub projection: Arc<[usize]>,
-    /// Target number of rows in each output RecordBatch
+    pub projection_deep: Arc<HashMap<usize, Vec<String>>>,
     pub batch_size: usize,
     /// Optional limit on the number of rows to read
     pub limit: Option<usize>,
@@ -105,11 +109,31 @@ impl FileOpener for ParquetOpener {
 
         let batch_size = self.batch_size;
 
-        let projected_schema =
-            SchemaRef::from(self.table_schema.project(&self.projection)?);
+        let projection = self.projection.clone();
+        let projection_vec = projection
+            .as_ref()
+            .iter()
+            .map(|i| *i)
+            .collect::<Vec<usize>>();
+        info!(
+            "ParquetOpener::open projection={:?}, deep_projection: {:?}",
+            projection, &self.projection_deep
+        );
+        // FIXME @HStack: ADR: why do we need to do this ? our function needs another param maybe ?
+        // In the case when the projections requested are empty, we should return an empty schema
+        let projected_schema = if projection_vec.len() == 0 {
+            SchemaRef::from(self.table_schema.project(&projection)?)
+        } else {
+            rewrite_schema(
+                self.table_schema.clone(),
+                &projection_vec,
+                self.projection_deep.as_ref(),
+            )
+        };
         let schema_adapter = self
             .schema_adapter_factory
             .create(projected_schema, Arc::clone(&self.table_schema));
+        let projection_deep = self.projection_deep.clone();
         let predicate = self.predicate.clone();
         let pruning_predicate = self.pruning_predicate.clone();
         let page_pruning_predicate = self.page_pruning_predicate.clone();
@@ -159,11 +183,37 @@ impl FileOpener for ParquetOpener {
             let (schema_mapping, adapted_projections) =
                 schema_adapter.map_schema(&file_schema)?;
 
-            let mask = ProjectionMask::roots(
-                builder.parquet_schema(),
-                adapted_projections.iter().cloned(),
-            );
+            // let mask = ProjectionMask::roots(
+            //     builder.parquet_schema(),
+            //     adapted_projections.iter().cloned(),
+            // );
+            let mask = if has_deep_projection(Some(projection_deep.clone().as_ref())) {
+                let leaves = generate_leaf_paths(
+                    table_schema.clone(),
+                    builder.parquet_schema(),
+                    &projection_vec,
+                    projection_deep.clone().as_ref(),
+                );
+                info!(
+                    "ParquetOpener::open, using deep projection parquet leaves: {:?}",
+                    leaves.clone()
+                );
+                // let tmp = builder.parquet_schema();
+                // for (i, col) in tmp.columns().iter().enumerate() {
+                //     info!("  {}  {}= {:?}", i, col.path(), col);
+                // }
+                ProjectionMask::leaves(builder.parquet_schema(), leaves)
+            } else {
+                info!(
+                    "ParquetOpener::open, using root projections: {:?}",
+                    &adapted_projections
+                );
 
+                ProjectionMask::roots(
+                    builder.parquet_schema(),
+                    adapted_projections.iter().cloned(),
+                )
+            };
             // Filter pushdown: evaluate predicates during scan
             if let Some(predicate) = pushdown_filters.then_some(predicate).flatten() {
                 let row_filter = row_filter::build_row_filter(
@@ -303,3 +353,103 @@ fn create_initial_plan(
     // default to scanning all row groups
     Ok(ParquetAccessPlan::new_all(row_group_count))
 }
+
+// FIXME: @HStack ACTUALLY look at the arrow schema and handle map types correctly
+//  Right now, we are matching "map-like" parquet leaves like "key_value.key" etc
+//  But, we neeed to walk through both the arrow schema (which KNOWS about the map type)
+//  and the parquet leaves to do this correctly.
+fn equivalent_projection_paths_from_parquet_schema(
+    _arrow_schema: SchemaRef,
+    parquet_schema: &SchemaDescriptor,
+) -> Vec<(usize, (String, String))> {
+    let mut output: Vec<(usize, (String, String))> = vec![];
+    for (i, col) in parquet_schema.columns().iter().enumerate() {
+        let original_path = col.path().string();
+        let converted_path =
+            convert_parquet_path_to_deep_projection_path(&original_path.as_str());
+        output.push((i, (original_path.clone(), converted_path)));
+    }
+    output
+}
+
+fn convert_parquet_path_to_deep_projection_path(parquet_path: &str) -> String {
+    if parquet_path.contains(".key_value.key")
+        || parquet_path.contains(".key_value.value")
+        || parquet_path.contains(".entries.keys")
+        || parquet_path.contains(".entries.values")
+        || parquet_path.contains(".list.element")
+    {
+        let tmp = parquet_path
+            .replace("key_value.key", "*")
+            .replace("key_value.value", "*")
+            .replace("entries.keys", "*")
+            .replace("entries.values", "*")
+            .replace("list.element", "*");
+        tmp
+    } else {
+        parquet_path.to_string()
+    }
+}
+
+fn generate_leaf_paths(
+    arrow_schema: SchemaRef,
+    parquet_schema: &SchemaDescriptor,
+    projection: &Vec<usize>,
+    projection_deep: &HashMap<usize, Vec<String>>,
+) -> Vec<usize> {
+    let actual_projection = if projection.len() == 0 {
+        (0..arrow_schema.fields().len()).collect()
+    } else {
+        projection.clone()
+    };
+    let splatted =
+        splat_columns(arrow_schema.clone(), &actual_projection, &projection_deep);
+    trace!(target: "deep", "generate_leaf_paths: splatted: {:?}", &splatted);
+
+    let mut out: Vec<usize> = vec![];
+    for (i, (original, converted)) in
+        equivalent_projection_paths_from_parquet_schema(arrow_schema, parquet_schema)
+    {
+        // FIXME: @HStack
+        //  for map fields, the actual parquet paths look like x.y.z.key_value.key, x.y.z.key_value.value
+        //  since we are ignoring these names in the paths, we need to actually collapse this access to a *
+        //  so we can filter for them
+        //  also, we need BOTH the key and the value for maps otherwise we run into an arrow-rs error
+        //  "partial projection of MapArray is not supported"
+
+        trace!(target: "deep", "  generate_leaf_paths looking at index {} {} =  {}", i, &original, &converted);
+
+        let mut found = false;
+        for filter in splatted.iter() {
+            // check if this filter matches this leaf path
+            let filter_pieces = filter.split(".").collect::<Vec<&str>>();
+            // let col_pieces = col_path.parts();
+            let col_pieces = converted.split(".").collect::<Vec<_>>();
+            // let's check
+            let mut filter_found = true;
+            for i in 0..min(filter_pieces.len(), col_pieces.len()) {
+                if i >= filter_pieces.len() {
+                    //  we are at the end of the filter, and we matched until now, so we break, we match !
+                    break;
+                }
+                if i >= col_pieces.len() {
+                    // we have a longer filter, we matched until now, we match !
+                    break;
+                }
+                // we can actually check
+                if !(col_pieces[i] == filter_pieces[i] || filter_pieces[i] == "*") {
+                    filter_found = false;
+                    break;
+                }
+            }
+            if filter_found {
+                found = true;
+                break;
+            }
+        }
+        if found {
+            out.push(i);
+        }
+    }
+    out
+}
Original file line number	Diff line number	Diff line change
`@@ -233,8 +233,8 @@ impl ParquetExecBuilder {`
`233`	`233`	`}`
`234`	`234`
`235`	`235`	`let base_config = file_scan_config.with_source(Arc::new(parquet.clone()));`
`236`		`- debug!("Creating ParquetExec, files: {:?}, projection {:?}, predicate: {:?}, limit: {:?}",`
`237`		`- base_config.file_groups, base_config.projection, predicate, base_config.limit);`
	`236`	`+ debug!("Creating ParquetExec, files: {:?}, projection {:?}, projection deep {:?}, predicate: {:?}, limit: {:?}",`
	`237`	`+ base_config.file_groups, base_config.projection, base_config.projection_deep, predicate, base_config.limit);`
`238`	`238`
`239`	`239`	`ParquetExec {`
`240`	`240`	`inner: DataSourceExec::new(Arc::new(base_config.clone())),`