@@ -22,11 +22,7 @@ use arrow_schema::{DataType, Fields, SchemaBuilder};
22
22
use crate :: arrow:: array_reader:: byte_view_array:: make_byte_view_array_reader;
23
23
use crate :: arrow:: array_reader:: empty_array:: make_empty_array_reader;
24
24
use crate :: arrow:: array_reader:: fixed_len_byte_array:: make_fixed_len_byte_array_reader;
25
- use crate :: arrow:: array_reader:: {
26
- make_byte_array_dictionary_reader, make_byte_array_reader, ArrayReader ,
27
- FixedSizeListArrayReader , ListArrayReader , MapArrayReader , NullArrayReader ,
28
- PrimitiveArrayReader , RowGroups , StructArrayReader ,
29
- } ;
25
+ use crate :: arrow:: array_reader:: { make_byte_array_dictionary_reader, make_byte_array_reader, ArrayReader , CachedPredicateResult , FixedSizeListArrayReader , ListArrayReader , MapArrayReader , NullArrayReader , PrimitiveArrayReader , RowGroups , StructArrayReader } ;
30
26
use crate :: arrow:: schema:: { ParquetField , ParquetFieldType } ;
31
27
use crate :: arrow:: ProjectionMask ;
32
28
use crate :: basic:: Type as PhysicalType ;
@@ -37,12 +33,12 @@ use crate::schema::types::{ColumnDescriptor, ColumnPath, Type};
37
33
/// Builds array reader from parquet schema, projection mask, and parquet file reader.
38
34
pub ( crate ) struct ArrayReaderBuilder < ' a > {
39
35
row_groups : & ' a dyn RowGroups ,
40
- // todo add cached predicate results
36
+ cached_predicate_result : Option < & ' a CachedPredicateResult > ,
41
37
}
42
38
43
39
impl < ' a > ArrayReaderBuilder < ' a > {
44
- pub ( crate ) fn new ( row_groups : & ' a dyn RowGroups ) -> Self {
45
- Self { row_groups }
40
+ pub ( crate ) fn new ( row_groups : & ' a dyn RowGroups , cached_predicate_result : Option < & ' a CachedPredicateResult > ) -> Self {
41
+ Self { row_groups, cached_predicate_result }
46
42
}
47
43
48
44
/// Create [`ArrayReader`] from parquet schema, projection mask, and parquet file reader.
@@ -64,11 +60,16 @@ impl<'a> ArrayReaderBuilder<'a> {
64
60
self . row_groups . num_rows ( )
65
61
}
66
62
63
+
67
64
fn build_reader (
68
65
& self ,
69
66
field : & ParquetField ,
70
67
mask : & ProjectionMask ,
71
68
) -> Result < Option < Box < dyn ArrayReader > > > {
69
+ if let Some ( builder) = self . build_cached_reader ( field, mask) ? {
70
+ return Ok ( Some ( builder) ) ;
71
+ }
72
+
72
73
match field. field_type {
73
74
ParquetFieldType :: Primitive { .. } => self . build_primitive_reader ( field, mask) ,
74
75
ParquetFieldType :: Group { .. } => match & field. arrow_type {
@@ -80,6 +81,29 @@ impl<'a> ArrayReaderBuilder<'a> {
80
81
d => unimplemented ! ( "reading group type {} not implemented" , d) ,
81
82
} ,
82
83
}
84
+ }
85
+
86
+ /// Build cached array reader if the field is in the projection mask and in the cache
87
+ fn build_cached_reader (
88
+ & self ,
89
+ field : & ParquetField ,
90
+ mask : & ProjectionMask ,
91
+ ) -> Result < Option < Box < dyn ArrayReader > > > {
92
+ let Some ( cached_predicate_result) = self . cached_predicate_result else {
93
+ return Ok ( None ) ;
94
+ } ;
95
+
96
+ // TODO how to find a cached struct / list
97
+ // (Probably have to cache the individual fields)
98
+ let ParquetFieldType :: Primitive { col_idx, primitive_type : _} = & field. field_type else {
99
+ return Ok ( None ) ;
100
+ } ;
101
+
102
+ if !mask. leaf_included ( * col_idx) {
103
+ return Ok ( None ) ;
104
+ }
105
+
106
+ cached_predicate_result. build_reader ( * col_idx)
83
107
}
84
108
85
109
/// Build array reader for map type.
@@ -376,7 +400,8 @@ mod tests {
376
400
)
377
401
. unwrap ( ) ;
378
402
379
- let array_reader = ArrayReaderBuilder :: new ( & file_reader)
403
+ let cached_predicate_result = None ;
404
+ let array_reader = ArrayReaderBuilder :: new ( & file_reader, cached_predicate_result)
380
405
. build_array_reader ( fields. as_ref ( ) , & mask)
381
406
. unwrap ( ) ;
382
407
0 commit comments