Skip to content

Commit ade44df

Browse files
committed
update API
1 parent ac38bc4 commit ade44df

File tree

1 file changed

+20
-15
lines changed

1 file changed

+20
-15
lines changed

datafusion-examples/examples/advanced_parquet_index.rs

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use async_trait::async_trait;
2626
use bytes::Bytes;
2727
use datafusion::datasource::listing::PartitionedFile;
2828
use datafusion::datasource::physical_plan::parquet::{
29-
ParquetAccessPlan, RequestedStatistics, RowGroupAccess, StatisticsConverter,
29+
ParquetAccessPlan, RowGroupAccess, StatisticsConverter,
3030
};
3131
use datafusion::datasource::physical_plan::{
3232
parquet::ParquetFileReaderFactory, FileMeta, FileScanConfig, ParquetExec,
@@ -712,20 +712,25 @@ impl ParquetRowGroupMetadataIndexBuilder {
712712
let num_row_groups = metadata.num_row_groups();
713713

714714
// Extract the min/max values for each row group from the statistics
715-
// TODO make an API that permits appending a row group at a time
716-
let row_counts = StatisticsConverter::row_counts(metadata)?;
717-
let value_column_mins =
718-
StatisticsConverter::try_new("value", RequestedStatistics::Min, schema)?
719-
.extract(metadata)?;
720-
let value_column_maxes =
721-
StatisticsConverter::try_new("value", RequestedStatistics::Max, schema)?
722-
.extract(metadata)?;
723-
let tag_column_mins =
724-
StatisticsConverter::try_new("tag", RequestedStatistics::Min, schema)?
725-
.extract(metadata)?;
726-
let tag_column_maxes =
727-
StatisticsConverter::try_new("tag", RequestedStatistics::Max, schema)?
728-
.extract(metadata)?;
715+
let row_group_meta = metadata.row_groups();
716+
717+
let row_counts = StatisticsConverter::row_group_row_counts(row_group_meta)?;
718+
719+
let value_converter = StatisticsConverter::try_new(
720+
"value",
721+
schema,
722+
metadata.file_metadata().schema_descr(),
723+
)?;
724+
let value_column_mins = value_converter.row_group_mins(row_group_meta)?;
725+
let value_column_maxes = value_converter.row_group_maxes(row_group_meta)?;
726+
727+
let tag_converter = StatisticsConverter::try_new(
728+
"tag",
729+
schema,
730+
metadata.file_metadata().schema_descr(),
731+
)?;
732+
let tag_column_mins = tag_converter.row_group_mins(row_group_meta)?;
733+
let tag_column_maxes = tag_converter.row_group_maxes(row_group_meta)?;
729734

730735
// sanity check the statistics
731736
assert_eq!(row_counts.len(), num_row_groups);

0 commit comments

Comments
 (0)