@@ -475,10 +475,21 @@ mod tests {
475
475
// c1 > 5, this row group will not be included in the results.
476
476
vec ! [ ParquetStatistics :: int32( Some ( 10 ) , Some ( 20 ) , None , 0 , false ) ] ,
477
477
) ;
478
+ let rgm3 = get_row_group_meta_data (
479
+ & schema_descr,
480
+ // [1, None]
481
+ // c1 > 5, this row group can not be filtered out, so will be included in the results.
482
+ vec ! [ ParquetStatistics :: int32( Some ( 100 ) , None , None , 0 , false ) ] ,
483
+ ) ;
478
484
let metrics = parquet_file_metrics ( ) ;
479
485
assert_eq ! (
480
- prune_row_groups( & [ rgm1, rgm2] , None , Some ( & pruning_predicate) , & metrics) ,
481
- vec![ 0 ]
486
+ prune_row_groups(
487
+ & [ rgm1, rgm2, rgm3] ,
488
+ None ,
489
+ Some ( & pruning_predicate) ,
490
+ & metrics
491
+ ) ,
492
+ vec![ 0 , 2 ]
482
493
) ;
483
494
484
495
// INT32: c1 > 5, but parquet decimal type has different precision or scale to arrow decimal
@@ -528,15 +539,21 @@ mod tests {
528
539
// c1 > 5, this row group will not be included in the results.
529
540
vec ! [ ParquetStatistics :: int32( Some ( 0 ) , Some ( 2 ) , None , 0 , false ) ] ,
530
541
) ;
542
+ let rgm4 = get_row_group_meta_data (
543
+ & schema_descr,
544
+ // [None, 2]
545
+ // c1 > 5, this row group can not be filtered out, so will be included in the results.
546
+ vec ! [ ParquetStatistics :: int32( None , Some ( 2 ) , None , 0 , false ) ] ,
547
+ ) ;
531
548
let metrics = parquet_file_metrics ( ) ;
532
549
assert_eq ! (
533
550
prune_row_groups(
534
- & [ rgm1, rgm2, rgm3] ,
551
+ & [ rgm1, rgm2, rgm3, rgm4 ] ,
535
552
None ,
536
553
Some ( & pruning_predicate) ,
537
554
& metrics
538
555
) ,
539
- vec![ 0 , 1 ]
556
+ vec![ 0 , 1 , 3 ]
540
557
) ;
541
558
542
559
// INT64: c1 < 5, the c1 is decimal(18,2)
@@ -572,10 +589,20 @@ mod tests {
572
589
// [0.1, 0.2]
573
590
vec ! [ ParquetStatistics :: int64( Some ( 10 ) , Some ( 20 ) , None , 0 , false ) ] ,
574
591
) ;
592
+ let rgm3 = get_row_group_meta_data (
593
+ & schema_descr,
594
+ // [0.1, 0.2]
595
+ vec ! [ ParquetStatistics :: int64( None , None , None , 0 , false ) ] ,
596
+ ) ;
575
597
let metrics = parquet_file_metrics ( ) ;
576
598
assert_eq ! (
577
- prune_row_groups( & [ rgm1, rgm2] , None , Some ( & pruning_predicate) , & metrics) ,
578
- vec![ 1 ]
599
+ prune_row_groups(
600
+ & [ rgm1, rgm2, rgm3] ,
601
+ None ,
602
+ Some ( & pruning_predicate) ,
603
+ & metrics
604
+ ) ,
605
+ vec![ 1 , 2 ]
579
606
) ;
580
607
581
608
// FIXED_LENGTH_BYTE_ARRAY: c1 = decimal128(100000, 28, 3), the c1 is decimal(18,2)
@@ -631,13 +658,24 @@ mod tests {
631
658
false ,
632
659
) ] ,
633
660
) ;
661
+
662
+ let rgm3 = get_row_group_meta_data (
663
+ & schema_descr,
664
+ vec ! [ ParquetStatistics :: fixed_len_byte_array(
665
+ None , None , None , 0 , false ,
666
+ ) ] ,
667
+ ) ;
634
668
let metrics = parquet_file_metrics ( ) ;
635
669
assert_eq ! (
636
- prune_row_groups( & [ rgm1, rgm2] , None , Some ( & pruning_predicate) , & metrics) ,
637
- vec![ 1 ]
670
+ prune_row_groups(
671
+ & [ rgm1, rgm2, rgm3] ,
672
+ None ,
673
+ Some ( & pruning_predicate) ,
674
+ & metrics
675
+ ) ,
676
+ vec![ 1 , 2 ]
638
677
) ;
639
678
640
- // TODO: BYTE_ARRAY support read decimal from parquet, after the 20.0.0 arrow-rs release
641
679
// BYTE_ARRAY: c1 = decimal128(100000, 28, 3), the c1 is decimal(18,2)
642
680
// the type of parquet is decimal(18,2)
643
681
let schema =
@@ -683,10 +721,19 @@ mod tests {
683
721
false ,
684
722
) ] ,
685
723
) ;
724
+ let rgm3 = get_row_group_meta_data (
725
+ & schema_descr,
726
+ vec ! [ ParquetStatistics :: byte_array( None , None , None , 0 , false ) ] ,
727
+ ) ;
686
728
let metrics = parquet_file_metrics ( ) ;
687
729
assert_eq ! (
688
- prune_row_groups( & [ rgm1, rgm2] , None , Some ( & pruning_predicate) , & metrics) ,
689
- vec![ 1 ]
730
+ prune_row_groups(
731
+ & [ rgm1, rgm2, rgm3] ,
732
+ None ,
733
+ Some ( & pruning_predicate) ,
734
+ & metrics
735
+ ) ,
736
+ vec![ 1 , 2 ]
690
737
) ;
691
738
}
692
739
0 commit comments