Skip to content

Commit 6519f8e

Browse files
authored
Minor: improve ParquetOpener docs (#12456)
1 parent 468a5a8 commit 6519f8e

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

datafusion/core/src/datasource/physical_plan/parquet/opener.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,41 @@ use std::sync::Arc;
4141

4242
/// Implements [`FileOpener`] for a parquet file
4343
pub(super) struct ParquetOpener {
44+
/// Execution partition index
4445
pub partition_index: usize,
46+
/// Column indexes in `table_schema` needed by the query
4547
pub projection: Arc<[usize]>,
48+
/// Target number of rows in each output RecordBatch
4649
pub batch_size: usize,
50+
/// Optional limit on the number of rows to read
4751
pub limit: Option<usize>,
52+
/// Optional predicate to apply during the scan
4853
pub predicate: Option<Arc<dyn PhysicalExpr>>,
54+
/// Optional pruning predicate applied to row group statistics
4955
pub pruning_predicate: Option<Arc<PruningPredicate>>,
56+
/// Optional pruning predicate applied to data page statistics
5057
pub page_pruning_predicate: Option<Arc<PagePruningAccessPlanFilter>>,
58+
/// Schema of the output table
5159
pub table_schema: SchemaRef,
60+
/// Optional hint for how large the initial request to read parquet metadata
61+
/// should be
5262
pub metadata_size_hint: Option<usize>,
63+
/// Metrics for reporting
5364
pub metrics: ExecutionPlanMetricsSet,
65+
/// Factory for instantiating parquet reader
5466
pub parquet_file_reader_factory: Arc<dyn ParquetFileReaderFactory>,
67+
/// Should the filters be evaluated during the parquet scan using
68+
/// [`DataFusionArrowPredicate`](row_filter::DatafusionArrowPredicate)?
5569
pub pushdown_filters: bool,
70+
/// Should the filters be reordered to optimize the scan?
5671
pub reorder_filters: bool,
72+
/// Should the page index be read from parquet files, if present, to skip
73+
/// data pages
5774
pub enable_page_index: bool,
75+
/// Should the bloom filter be read from parquet, if present, to skip row
76+
/// groups
5877
pub enable_bloom_filter: bool,
78+
/// Schema adapter factory
5979
pub schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
6080
}
6181

datafusion/core/src/datasource/schema_adapter.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,18 @@ pub trait SchemaAdapter: Send + Sync {
7373
) -> datafusion_common::Result<(Arc<dyn SchemaMapper>, Vec<usize>)>;
7474
}
7575

76-
/// Creates a `SchemaMapping` that can be used to cast or map the columns
77-
/// from the file schema to the table schema.
76+
/// Maps, by casting or reordering columns from the file schema to the table
77+
/// schema.
7878
pub trait SchemaMapper: Debug + Send + Sync {
79-
/// Adapts a `RecordBatch` to match the `table_schema` using the stored mapping and conversions.
79+
/// Adapts a `RecordBatch` to match the `table_schema` using the stored
80+
/// mapping and conversions.
8081
fn map_batch(&self, batch: RecordBatch) -> datafusion_common::Result<RecordBatch>;
8182

82-
/// Adapts a [`RecordBatch`] that does not have all the columns from the
83+
/// Adapts a [`RecordBatch`] that does not have all the columns from the
8384
/// file schema.
8485
///
85-
/// This method is used when applying a filter to a subset of the columns during
86-
/// an `ArrowPredicate`.
86+
/// This method is used when applying a filter to a subset of the columns as
87+
/// part of `DataFusionArrowPredicate` when `filter_pushdown` is enabled.
8788
///
8889
/// This method is slower than `map_batch` as it looks up columns by name.
8990
fn map_partial_batch(
@@ -92,7 +93,7 @@ pub trait SchemaMapper: Debug + Send + Sync {
9293
) -> datafusion_common::Result<RecordBatch>;
9394
}
9495

95-
/// Basic implementation of [`SchemaAdapterFactory`] that maps columns by name
96+
/// Implementation of [`SchemaAdapterFactory`] that maps columns by name
9697
/// and casts columns to the expected type.
9798
#[derive(Clone, Debug, Default)]
9899
pub struct DefaultSchemaAdapterFactory {}

0 commit comments

Comments
 (0)