@@ -113,6 +113,7 @@ async fn main() -> Result<()> {
113
113
. await ?
114
114
. show ( )
115
115
. await ?;
116
+ println ! ( "Files pruned: {}" , provider. index( ) . last_num_pruned( ) ) ;
116
117
117
118
// Run a query that uses the index to prune files.
118
119
//
@@ -221,8 +222,12 @@ impl TableProvider for IndexTableProvider {
221
222
// that always evaluates to true we can pass to the index
222
223
. unwrap_or_else ( || datafusion_physical_expr:: expressions:: lit ( true ) ) ;
223
224
224
- // Use the index to find the files that might have data that matches the predicate.
225
+ // Use the index to find the files that might have data that matches the
226
+ // predicate. Any file that can not have data that matches the predicate
227
+ // will not be returned.
225
228
let files = self . index . get_files ( predicate. clone ( ) ) ?;
229
+
230
+ // Transform to the format needed to pass to ParquetExec
226
231
// Create one file group per file (default to scanning them all in parallel)
227
232
let file_groups = files
228
233
. into_iter ( )
@@ -534,8 +539,8 @@ impl ParquetMetadataIndexBuilder {
534
539
assert_eq ! ( value_column_mins. null_count( ) , 0 ) ;
535
540
assert_eq ! ( value_column_maxes. null_count( ) , 0 ) ;
536
541
537
- // compute the total row count, and overall min and max of the "value"
538
- // column in this file
542
+ // The statistics above are one for row group so we need to compute the
543
+ // overall file row count, and min and max .
539
544
let row_count = row_counts
540
545
. iter ( )
541
546
. flatten ( ) // skip nulls (should be none)
0 commit comments