Skip to content

Commit 8be8b09

Browse files
committed
improve comments
1 parent 08618e8 commit 8be8b09

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

datafusion-examples/examples/parquet_index.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ async fn main() -> Result<()> {
113113
.await?
114114
.show()
115115
.await?;
116+
println!("Files pruned: {}", provider.index().last_num_pruned());
116117

117118
// Run a query that uses the index to prune files.
118119
//
@@ -221,8 +222,12 @@ impl TableProvider for IndexTableProvider {
221222
// that always evaluates to true we can pass to the index
222223
.unwrap_or_else(|| datafusion_physical_expr::expressions::lit(true));
223224

224-
// Use the index to find the files that might have data that matches the predicate.
225+
// Use the index to find the files that might have data that matches the
226+
// predicate. Any file that can not have data that matches the predicate
227+
// will not be returned.
225228
let files = self.index.get_files(predicate.clone())?;
229+
230+
// Transform to the format needed to pass to ParquetExec
226231
// Create one file group per file (default to scanning them all in parallel)
227232
let file_groups = files
228233
.into_iter()
@@ -534,8 +539,8 @@ impl ParquetMetadataIndexBuilder {
534539
assert_eq!(value_column_mins.null_count(), 0);
535540
assert_eq!(value_column_maxes.null_count(), 0);
536541

537-
// compute the total row count, and overall min and max of the "value"
538-
// column in this file
542+
// The statistics above are one for row group so we need to compute the
543+
// overall file row count, and min and max .
539544
let row_count = row_counts
540545
.iter()
541546
.flatten() // skip nulls (should be none)

0 commit comments

Comments
 (0)