@@ -26,7 +26,7 @@ use async_trait::async_trait;
26
26
use bytes:: Bytes ;
27
27
use datafusion:: datasource:: listing:: PartitionedFile ;
28
28
use datafusion:: datasource:: physical_plan:: parquet:: {
29
- ParquetAccessPlan , RequestedStatistics , RowGroupAccess , StatisticsConverter ,
29
+ ParquetAccessPlan , RowGroupAccess , StatisticsConverter ,
30
30
} ;
31
31
use datafusion:: datasource:: physical_plan:: {
32
32
parquet:: ParquetFileReaderFactory , FileMeta , FileScanConfig , ParquetExec ,
@@ -712,20 +712,25 @@ impl ParquetRowGroupMetadataIndexBuilder {
712
712
let num_row_groups = metadata. num_row_groups ( ) ;
713
713
714
714
// Extract the min/max values for each row group from the statistics
715
- // TODO make an API that permits appending a row group at a time
716
- let row_counts = StatisticsConverter :: row_counts ( metadata) ?;
717
- let value_column_mins =
718
- StatisticsConverter :: try_new ( "value" , RequestedStatistics :: Min , schema) ?
719
- . extract ( metadata) ?;
720
- let value_column_maxes =
721
- StatisticsConverter :: try_new ( "value" , RequestedStatistics :: Max , schema) ?
722
- . extract ( metadata) ?;
723
- let tag_column_mins =
724
- StatisticsConverter :: try_new ( "tag" , RequestedStatistics :: Min , schema) ?
725
- . extract ( metadata) ?;
726
- let tag_column_maxes =
727
- StatisticsConverter :: try_new ( "tag" , RequestedStatistics :: Max , schema) ?
728
- . extract ( metadata) ?;
715
+ let row_group_meta = metadata. row_groups ( ) ;
716
+
717
+ let row_counts = StatisticsConverter :: row_group_row_counts ( row_group_meta) ?;
718
+
719
+ let value_converter = StatisticsConverter :: try_new (
720
+ "value" ,
721
+ schema,
722
+ metadata. file_metadata ( ) . schema_descr ( ) ,
723
+ ) ?;
724
+ let value_column_mins = value_converter. row_group_mins ( row_group_meta) ?;
725
+ let value_column_maxes = value_converter. row_group_maxes ( row_group_meta) ?;
726
+
727
+ let tag_converter = StatisticsConverter :: try_new (
728
+ "tag" ,
729
+ schema,
730
+ metadata. file_metadata ( ) . schema_descr ( ) ,
731
+ ) ?;
732
+ let tag_column_mins = tag_converter. row_group_mins ( row_group_meta) ?;
733
+ let tag_column_maxes = tag_converter. row_group_maxes ( row_group_meta) ?;
729
734
730
735
// sanity check the statistics
731
736
assert_eq ! ( row_counts. len( ) , num_row_groups) ;
0 commit comments