@@ -22,24 +22,20 @@ use crate::arrow::array_reader::ArrayReader;
22
22
use crate :: arrow:: arrow_reader:: {
23
23
ArrowPredicate , ParquetRecordBatchReader , RowSelection , RowSelector ,
24
24
} ;
25
- use crate :: errors:: ParquetError ;
25
+ use crate :: errors:: { ParquetError , Result } ;
26
26
use arrow_array:: Array ;
27
27
use arrow_select:: filter:: prep_null_mask_filter;
28
28
use std:: collections:: VecDeque ;
29
29
30
30
/// A builder for [`ReadPlan`]
31
+ ///
32
+ /// See also [`LimitedReadPlanBuilder`], for applying limits and offsets
31
33
#[ derive( Clone ) ]
32
34
pub ( crate ) struct ReadPlanBuilder {
33
35
batch_size : usize ,
34
36
/// Current to apply, includes all filters
35
37
selection : Option < RowSelection > ,
36
38
// TODO: Cached result of evaluating some columns with the RowSelection
37
- /// Total number of rows in the row group before the selection
38
- num_rows : Option < usize > ,
39
- /// Rows to skip before returning any rows
40
- offset : Option < usize > ,
41
- /// Limit on the number of rows to return
42
- limit : Option < usize > ,
43
39
}
44
40
45
41
impl ReadPlanBuilder {
@@ -50,9 +46,6 @@ impl ReadPlanBuilder {
50
46
Self {
51
47
batch_size,
52
48
selection : None ,
53
- num_rows : None ,
54
- offset : None ,
55
- limit : None ,
56
49
}
57
50
}
58
51
@@ -67,22 +60,14 @@ impl ReadPlanBuilder {
67
60
self . selection . as_ref ( )
68
61
}
69
62
70
- /// set the number of rows in the row group
71
- pub ( crate ) fn with_num_rows ( mut self , num_rows : usize ) -> Self {
72
- self . num_rows = Some ( num_rows) ;
73
- self
74
- }
75
-
76
- /// Set the offset to the given value
77
- pub ( crate ) fn with_offset ( mut self , offset : Option < usize > ) -> Self {
78
- self . offset = offset;
79
- self
80
- }
81
-
82
- /// Set the limit to the given value
83
- pub ( crate ) fn with_limit ( mut self , limit : Option < usize > ) -> Self {
84
- self . limit = limit;
85
- self
63
+ /// Specify the number of rows in the row group before filtering
64
+ /// returning a [`LimitedReadPlanBuilder`] that can apply
65
+ /// offset and limit.
66
+ ///
67
+ /// Call [`LimitedReadPlanBuilder::build_limited`] to apply the limits to this
68
+ /// selection.
69
+ pub ( crate ) fn limited ( self , row_count : usize ) -> LimitedReadPlanBuilder {
70
+ LimitedReadPlanBuilder :: new ( self , row_count)
86
71
}
87
72
88
73
/// Returns true if the current plan selects any rows
@@ -113,7 +98,7 @@ impl ReadPlanBuilder {
113
98
mut self ,
114
99
array_reader : Box < dyn ArrayReader > ,
115
100
predicate : & mut dyn ArrowPredicate ,
116
- ) -> crate :: errors :: Result < Self > {
101
+ ) -> Result < Self > {
117
102
let reader = ParquetRecordBatchReader :: new ( array_reader, self . clone ( ) . build ( ) ?) ;
118
103
let mut filters = vec ! [ ] ;
119
104
for maybe_batch in reader {
@@ -149,47 +134,9 @@ impl ReadPlanBuilder {
149
134
}
150
135
let Self {
151
136
batch_size,
152
- mut selection,
153
- num_rows,
154
- offset,
155
- limit,
137
+ selection,
156
138
} = self ;
157
139
158
- // TODO make this nicer somehow (maybe a different builder returned with Rowcount)
159
- if offset. is_some ( ) || limit. is_some ( ) {
160
- let Some ( row_count) = num_rows else {
161
- return Err ( general_err ! (
162
- "Internal ReadPlanBuilder::build() has limit/offset but called without a row count"
163
- ) ) ;
164
- } ;
165
-
166
- // If an offset is defined, apply it to the `selection`
167
- if let Some ( offset) = offset {
168
- selection = Some ( match row_count. checked_sub ( offset) {
169
- None => RowSelection :: from ( vec ! [ ] ) ,
170
- Some ( remaining) => selection
171
- . map ( |selection| selection. offset ( offset) )
172
- . unwrap_or_else ( || {
173
- RowSelection :: from ( vec ! [
174
- RowSelector :: skip( offset) ,
175
- RowSelector :: select( remaining) ,
176
- ] )
177
- } ) ,
178
- } ) ;
179
- }
180
-
181
- // If a limit is defined, apply it to the final `selection`
182
- if let Some ( limit) = limit {
183
- selection = Some (
184
- selection
185
- . map ( |selection| selection. limit ( limit) )
186
- . unwrap_or_else ( || {
187
- RowSelection :: from ( vec ! [ RowSelector :: select( limit. min( row_count) ) ] )
188
- } ) ,
189
- ) ;
190
- }
191
- }
192
-
193
140
let selection = selection. map ( |s| s. trim ( ) . into ( ) ) ;
194
141
195
142
Ok ( ReadPlan {
@@ -199,6 +146,90 @@ impl ReadPlanBuilder {
199
146
}
200
147
}
201
148
149
+ /// A builder for [`ReadPlan`] that applies a limit and offset to the read plan
150
+ ///
151
+ /// See [`ReadPlanBuilder::limited`] `
152
+ pub ( crate ) struct LimitedReadPlanBuilder {
153
+ /// The underlying read plan builder
154
+ inner : ReadPlanBuilder ,
155
+ /// Total number of rows in the row group before the selection, limit or
156
+ /// offset are applied
157
+ row_count : usize ,
158
+ /// The offset to apply to the read plan
159
+ offset : Option < usize > ,
160
+ /// The limit to apply to the read plan
161
+ limit : Option < usize > ,
162
+ }
163
+
164
+ impl LimitedReadPlanBuilder {
165
+ /// Create a new `LimitedReadPlanBuilder` from the existing builder and number of rows
166
+ fn new ( inner : ReadPlanBuilder , row_count : usize ) -> Self {
167
+ Self {
168
+ inner,
169
+ row_count,
170
+ offset : None ,
171
+ limit : None ,
172
+ }
173
+ }
174
+
175
+ /// Set the offset to apply to the read plan
176
+ pub ( crate ) fn with_offset ( mut self , offset : Option < usize > ) -> Self {
177
+ self . offset = offset;
178
+ self
179
+ }
180
+
181
+ /// Set the limit to apply to the read plan
182
+ pub ( crate ) fn with_limit ( mut self , limit : Option < usize > ) -> Self {
183
+ self . limit = limit;
184
+ self
185
+ }
186
+
187
+ /// Finalize apply the offset and limit and return the underlying builder
188
+ pub ( crate ) fn build_limited ( self ) -> ReadPlanBuilder {
189
+ let Self {
190
+ mut inner,
191
+ row_count,
192
+ offset,
193
+ limit,
194
+ } = self ;
195
+
196
+ // If selection is empty, truncate (needed??)
197
+ if !inner. selects_any ( ) {
198
+ inner. selection = Some ( RowSelection :: from ( vec ! [ ] ) ) ;
199
+ }
200
+
201
+ // If an offset is defined, apply it to the `selection`
202
+ if let Some ( offset) = offset {
203
+ inner. selection = Some ( match row_count. checked_sub ( offset) {
204
+ None => RowSelection :: from ( vec ! [ ] ) ,
205
+ Some ( remaining) => inner
206
+ . selection
207
+ . map ( |selection| selection. offset ( offset) )
208
+ . unwrap_or_else ( || {
209
+ RowSelection :: from ( vec ! [
210
+ RowSelector :: skip( offset) ,
211
+ RowSelector :: select( remaining) ,
212
+ ] )
213
+ } ) ,
214
+ } ) ;
215
+ }
216
+
217
+ // If a limit is defined, apply it to the final `selection`
218
+ if let Some ( limit) = limit {
219
+ inner. selection = Some (
220
+ inner
221
+ . selection
222
+ . map ( |selection| selection. limit ( limit) )
223
+ . unwrap_or_else ( || {
224
+ RowSelection :: from ( vec ! [ RowSelector :: select( limit. min( row_count) ) ] )
225
+ } ) ,
226
+ ) ;
227
+ }
228
+
229
+ inner
230
+ }
231
+ }
232
+
202
233
/// Describes what rows to read from a Parquet Row Group
203
234
/// including based on [`RowSelection`] and [`RowFilter`].
204
235
pub ( crate ) struct ReadPlan {
0 commit comments