@@ -40,7 +40,7 @@ use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMe
40
40
use parquet:: schema:: types:: { SchemaDescriptor , Type as ParquetType } ;
41
41
use roaring:: RoaringTreemap ;
42
42
43
- use crate :: arrow:: delete_file_manager:: DeleteFileManager ;
43
+ use crate :: arrow:: delete_file_manager:: CachingDeleteFileManager ;
44
44
use crate :: arrow:: record_batch_transformer:: RecordBatchTransformer ;
45
45
use crate :: arrow:: { arrow_schema_to_schema, get_arrow_datum} ;
46
46
use crate :: error:: Result ;
@@ -106,7 +106,11 @@ impl ArrowReaderBuilder {
106
106
pub fn build ( self ) -> ArrowReader {
107
107
ArrowReader {
108
108
batch_size : self . batch_size ,
109
- file_io : self . file_io ,
109
+ file_io : self . file_io . clone ( ) ,
110
+ delete_file_manager : CachingDeleteFileManager :: new (
111
+ self . file_io . clone ( ) ,
112
+ self . concurrency_limit_data_files ,
113
+ ) ,
110
114
concurrency_limit_data_files : self . concurrency_limit_data_files ,
111
115
row_group_filtering_enabled : self . row_group_filtering_enabled ,
112
116
row_selection_enabled : self . row_selection_enabled ,
@@ -119,6 +123,7 @@ impl ArrowReaderBuilder {
119
123
pub struct ArrowReader {
120
124
batch_size : Option < usize > ,
121
125
file_io : FileIO ,
126
+ delete_file_manager : CachingDeleteFileManager ,
122
127
123
128
/// the maximum number of data files that can be fetched at the same time
124
129
concurrency_limit_data_files : usize ,
@@ -145,9 +150,9 @@ impl ArrowReader {
145
150
task,
146
151
batch_size,
147
152
file_io,
153
+ self . delete_file_manager . clone ( ) ,
148
154
row_group_filtering_enabled,
149
155
row_selection_enabled,
150
- concurrency_limit_data_files,
151
156
)
152
157
} )
153
158
. map_err ( |err| {
@@ -163,20 +168,16 @@ impl ArrowReader {
163
168
task : FileScanTask ,
164
169
batch_size : Option < usize > ,
165
170
file_io : FileIO ,
171
+ delete_file_manager : CachingDeleteFileManager ,
166
172
row_group_filtering_enabled : bool ,
167
173
row_selection_enabled : bool ,
168
- concurrency_limit_data_files : usize ,
169
174
) -> Result < ArrowRecordBatchStream > {
170
175
let should_load_page_index =
171
176
( row_selection_enabled && task. predicate . is_some ( ) ) || !task. deletes . is_empty ( ) ;
172
177
173
178
// concurrently retrieve delete files and create RecordBatchStreamBuilder
174
- let ( delete_file_manager, mut record_batch_stream_builder) = try_join ! (
175
- DeleteFileManager :: load_deletes(
176
- task. deletes. clone( ) ,
177
- file_io. clone( ) ,
178
- concurrency_limit_data_files
179
- ) ,
179
+ let ( _, mut record_batch_stream_builder) = try_join ! (
180
+ delete_file_manager. load_deletes( task. deletes. clone( ) ) ,
180
181
Self :: create_parquet_record_batch_stream_builder(
181
182
& task. data_file_path,
182
183
file_io. clone( ) ,
0 commit comments