@@ -81,8 +81,10 @@ enum Scenario {
81
81
}
82
82
83
83
enum Unit {
84
- RowGroup ,
85
- Page ,
84
+ // pass max row per row_group in parquet writer
85
+ RowGroup ( usize ) ,
86
+ // pass max row per page in parquet writer
87
+ Page ( usize ) ,
86
88
}
87
89
88
90
/// Test fixture that has an execution context that has an external
@@ -185,13 +187,13 @@ impl ContextWithParquet {
185
187
mut config : SessionConfig ,
186
188
) -> Self {
187
189
let file = match unit {
188
- Unit :: RowGroup => {
190
+ Unit :: RowGroup ( row_per_group ) => {
189
191
config = config. with_parquet_bloom_filter_pruning ( true ) ;
190
- make_test_file_rg ( scenario) . await
192
+ make_test_file_rg ( scenario, row_per_group ) . await
191
193
}
192
- Unit :: Page => {
194
+ Unit :: Page ( row_per_page ) => {
193
195
config = config. with_parquet_page_index_pruning ( true ) ;
194
- make_test_file_page ( scenario) . await
196
+ make_test_file_page ( scenario, row_per_page ) . await
195
197
}
196
198
} ;
197
199
let parquet_path = file. path ( ) . to_string_lossy ( ) ;
@@ -880,15 +882,15 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
880
882
}
881
883
882
884
/// Create a test parquet file with various data types
883
- async fn make_test_file_rg ( scenario : Scenario ) -> NamedTempFile {
885
+ async fn make_test_file_rg ( scenario : Scenario , row_per_group : usize ) -> NamedTempFile {
884
886
let mut output_file = tempfile:: Builder :: new ( )
885
887
. prefix ( "parquet_pruning" )
886
888
. suffix ( ".parquet" )
887
889
. tempfile ( )
888
890
. expect ( "tempfile creation" ) ;
889
891
890
892
let props = WriterProperties :: builder ( )
891
- . set_max_row_group_size ( 5 )
893
+ . set_max_row_group_size ( row_per_group )
892
894
. set_bloom_filter_enabled ( true )
893
895
. build ( ) ;
894
896
@@ -906,17 +908,17 @@ async fn make_test_file_rg(scenario: Scenario) -> NamedTempFile {
906
908
output_file
907
909
}
908
910
909
- async fn make_test_file_page ( scenario : Scenario ) -> NamedTempFile {
911
+ async fn make_test_file_page ( scenario : Scenario , row_per_page : usize ) -> NamedTempFile {
910
912
let mut output_file = tempfile:: Builder :: new ( )
911
913
. prefix ( "parquet_page_pruning" )
912
914
. suffix ( ".parquet" )
913
915
. tempfile ( )
914
916
. expect ( "tempfile creation" ) ;
915
917
916
- // set row count to 5 , should get same result as rowGroup
918
+ // set row count to row_per_page , should get same result as rowGroup
917
919
let props = WriterProperties :: builder ( )
918
- . set_data_page_row_count_limit ( 5 )
919
- . set_write_batch_size ( 5 )
920
+ . set_data_page_row_count_limit ( row_per_page )
921
+ . set_write_batch_size ( row_per_page )
920
922
. build ( ) ;
921
923
922
924
let batches = create_data_batch ( scenario) ;
0 commit comments