Skip to content

Commit 3bd9b04

Browse files
committed
Add ParquetAccessPlan that describes which part of the parquet files to read
1 parent acd7106 commit 3bd9b04

File tree

5 files changed

+603
-223
lines changed

5 files changed

+603
-223
lines changed
Lines changed: 399 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,399 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
19+
use parquet::file::metadata::RowGroupMetaData;
20+
21+
/// A selection of rows and row groups within a ParquetFile to decode.
22+
///
23+
/// A `ParquetAccessPlan` is used to limits the row groups and data pages a `ParquetExec`
24+
/// will read and decode and this improve performance.
25+
///
26+
/// Note that page level pruning based on ArrowPredicate is applied after all of
27+
/// these selections
28+
///
29+
/// # Example
30+
///
31+
/// For example, given a Parquet file with 4 row groups, a `ParquetAccessPlan`
32+
/// can be used to specify skipping row group 0 and 2, scanning a range of rows
33+
/// in row group 1, and scanning all rows in row group 3 as follows:
34+
///
35+
/// ```rust
36+
/// # use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
37+
/// # use datafusion::datasource::physical_plan::parquet::ParquetAccessPlan;
38+
/// // Default to scan all row groups
39+
/// let mut access_plan = ParquetAccessPlan::new_all(4);
40+
/// access_plan.skip(0); // skip row group
41+
/// // Use parquet reader RowSelector to specify scanning rows 100-200 and 350-400
42+
/// let row_selection = RowSelection::from(vec![
43+
/// RowSelector::skip(100),
44+
/// RowSelector::select(100),
45+
/// RowSelector::skip(150),
46+
/// RowSelector::select(50),
47+
/// ]);
48+
/// access_plan.scan_selection(1, row_selection);
49+
/// access_plan.skip(2); // skip row group 2
50+
/// // row group 3 is scanned by default
51+
/// ```
52+
///
53+
/// The resulting plan would look like:
54+
///
55+
/// ```text
56+
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐
57+
///
58+
/// │ │ SKIP
59+
///
60+
/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
61+
/// Row Group 0
62+
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐
63+
/// ┌────────────────┐ SCAN ONLY ROWS
64+
/// │└────────────────┘ │ 100-200
65+
/// ┌────────────────┐ 350-400
66+
/// │└────────────────┘ │
67+
/// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
68+
/// Row Group 1
69+
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐
70+
/// SKIP
71+
/// │ │
72+
///
73+
/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
74+
/// Row Group 2
75+
/// ┌───────────────────┐
76+
/// │ │ SCAN ALL ROWS
77+
/// │ │
78+
/// │ │
79+
/// └───────────────────┘
80+
/// Row Group 3
81+
/// ```
82+
#[derive(Debug, Clone, PartialEq)]
83+
pub struct ParquetAccessPlan {
84+
/// How to access the i-th row group
85+
row_groups: Vec<RowGroupAccess>,
86+
}
87+
88+
/// Describes how the parquet reader will access a row group
89+
#[derive(Debug, Clone, PartialEq)]
90+
pub enum RowGroupAccess {
91+
/// Do not read the row group at all
92+
Skip,
93+
/// Read all rows from the row group
94+
Scan,
95+
/// Scan only the specified rows within the row group
96+
Selection(RowSelection),
97+
}
98+
99+
impl RowGroupAccess {
100+
/// Return true if this row group should be scanned
101+
pub fn should_scan(&self) -> bool {
102+
match self {
103+
RowGroupAccess::Skip => false,
104+
RowGroupAccess::Scan | RowGroupAccess::Selection(_) => true,
105+
}
106+
}
107+
}
108+
109+
impl ParquetAccessPlan {
110+
/// Create a new `ParquetAccessPlan` that scans all row groups
111+
pub fn new_all(row_group_count: usize) -> Self {
112+
Self {
113+
row_groups: vec![RowGroupAccess::Scan; row_group_count],
114+
}
115+
}
116+
117+
/// Create a new `ParquetAccessPlan` that scans no row groups
118+
pub fn new_none(row_group_count: usize) -> Self {
119+
Self {
120+
row_groups: vec![RowGroupAccess::Skip; row_group_count],
121+
}
122+
}
123+
124+
/// Create a new `ParquetAccessPlan` from the specified [`RowGroupAccess`]es
125+
pub fn new(row_groups: Vec<RowGroupAccess>) -> Self {
126+
Self { row_groups }
127+
}
128+
129+
/// Set the i-th row group to the specified [`RowGroupAccess`]
130+
pub fn set(&mut self, idx: usize, access: RowGroupAccess) {
131+
self.row_groups[idx] = access;
132+
}
133+
134+
/// skips the i-th row group (should not be scanned)
135+
pub fn skip(&mut self, idx: usize) {
136+
self.set(idx, RowGroupAccess::Skip);
137+
}
138+
139+
/// Return true if the i-th row group should be scanned
140+
pub fn should_scan(&self, idx: usize) -> bool {
141+
self.row_groups[idx].should_scan()
142+
}
143+
144+
/// Set to scan only the [`RowSelection`] in the specified row group.
145+
///
146+
/// Behavior is different depending on the existing access
147+
/// * [`RowGroupAccess::Skip`]: does nothing
148+
/// * [`RowGroupAccess::Scan`]: Updates to scan only the rows in the `RowSelection`
149+
/// * [`RowGroupAccess::Selection`]: Updates to scan only the intersection of the existing selection and the new selection
150+
pub fn scan_selection(&mut self, idx: usize, selection: RowSelection) {
151+
self.row_groups[idx] = match &self.row_groups[idx] {
152+
// already skipping the entire row group
153+
RowGroupAccess::Skip => RowGroupAccess::Skip,
154+
RowGroupAccess::Scan => RowGroupAccess::Selection(selection),
155+
RowGroupAccess::Selection(existing_selection) => {
156+
RowGroupAccess::Selection(existing_selection.intersection(&selection))
157+
}
158+
}
159+
}
160+
161+
/// Return the overall `RowSelection` for all scanned row groups
162+
///
163+
/// This is used to compute the row selection for the parquet reader. See
164+
/// [`ArrowReaderBuilder::with_row_selection`] for more details.
165+
///
166+
/// Returns
167+
/// * `None` if there are no [`RowGroupAccess::Selection`]
168+
/// * `Some(selection)` if there are [`RowGroupAccess::Selection`]s
169+
///
170+
/// The returned selection represents which rows to scan across any row
171+
/// row groups which are not skipped.
172+
///
173+
/// # Example
174+
///
175+
/// Given an access plan like this:
176+
///
177+
/// ```text
178+
/// Scan (scan all row group 0)
179+
/// Skip (skip row group 1)
180+
/// Select 50-100 (scan rows 50-100 in row group 2)
181+
/// ```
182+
///
183+
/// Assuming each row group has 1000 rows, the resulting row selection would
184+
/// be the rows to scan in row group 0 and 2:
185+
///
186+
/// ```text
187+
/// Select 1000 (scan all rows in row group 0)
188+
/// Select 50-100 (scan rows 50-100 in row group 2)
189+
/// ```
190+
///
191+
/// Note there is no entry for the (entirely) skipped row group 1.
192+
///
193+
/// [`ArrowReaderBuilder::with_row_selection`]: parquet::arrow::arrow_reader::ArrowReaderBuilder::with_row_selection
194+
pub fn into_overall_row_selection(
195+
self,
196+
row_group_meta_data: &[RowGroupMetaData],
197+
) -> Option<RowSelection> {
198+
assert_eq!(row_group_meta_data.len(), self.row_groups.len());
199+
if !self
200+
.row_groups
201+
.iter()
202+
.any(|rg| matches!(rg, RowGroupAccess::Selection(_)))
203+
{
204+
return None;
205+
}
206+
207+
let total_selection: RowSelection = self
208+
.row_groups
209+
.into_iter()
210+
.zip(row_group_meta_data.iter())
211+
.flat_map(|(rg, rg_meta)| {
212+
match rg {
213+
RowGroupAccess::Skip => vec![],
214+
RowGroupAccess::Scan => {
215+
// need a row group access to scan the entire row group (need row group counts)
216+
vec![RowSelector::select(rg_meta.num_rows() as usize)]
217+
}
218+
RowGroupAccess::Selection(selection) => {
219+
let selection: Vec<RowSelector> = selection.into();
220+
selection
221+
}
222+
}
223+
})
224+
.collect();
225+
226+
Some(total_selection)
227+
}
228+
229+
/// Return an iterator over the row group indexes that should be scanned
230+
pub fn row_group_index_iter(&self) -> impl Iterator<Item = usize> + '_ {
231+
self.row_groups.iter().enumerate().filter_map(|(idx, b)| {
232+
if b.should_scan() {
233+
Some(idx)
234+
} else {
235+
None
236+
}
237+
})
238+
}
239+
240+
/// Return a vec of all row group indexes to scan
241+
pub fn row_group_indexes(&self) -> Vec<usize> {
242+
self.row_group_index_iter().collect()
243+
}
244+
245+
/// Return the total number of row groups (not the total number or groups to
246+
/// scan)
247+
pub fn len(&self) -> usize {
248+
self.row_groups.len()
249+
}
250+
251+
/// Return true if there are no row groups
252+
pub fn is_empty(&self) -> bool {
253+
self.row_groups.is_empty()
254+
}
255+
256+
/// Get a reference to the inner accesses
257+
pub fn inner(&self) -> &[RowGroupAccess] {
258+
&self.row_groups
259+
}
260+
261+
/// Covert into the inner row group accesses
262+
pub fn into_inner(self) -> Vec<RowGroupAccess> {
263+
self.row_groups
264+
}
265+
}
266+
267+
#[cfg(test)]
268+
mod test {
269+
use super::*;
270+
use parquet::basic::LogicalType;
271+
use parquet::file::metadata::ColumnChunkMetaData;
272+
use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor};
273+
use std::sync::{Arc, OnceLock};
274+
275+
#[test]
276+
fn test_overall_row_selection_only_scans() {
277+
assert_eq!(
278+
overall_row_selection(vec![
279+
RowGroupAccess::Scan,
280+
RowGroupAccess::Scan,
281+
RowGroupAccess::Scan,
282+
RowGroupAccess::Scan,
283+
]),
284+
None
285+
);
286+
}
287+
288+
#[test]
289+
fn test_overall_row_selection_only_skips() {
290+
assert_eq!(
291+
overall_row_selection(vec![
292+
RowGroupAccess::Skip,
293+
RowGroupAccess::Skip,
294+
RowGroupAccess::Skip,
295+
RowGroupAccess::Skip,
296+
]),
297+
None
298+
);
299+
}
300+
#[test]
301+
fn test_overall_row_selection_mixed_1() {
302+
assert_eq!(
303+
overall_row_selection(vec![
304+
RowGroupAccess::Scan,
305+
RowGroupAccess::Selection(
306+
vec![RowSelector::select(5), RowSelector::skip(7)].into()
307+
),
308+
RowGroupAccess::Skip,
309+
RowGroupAccess::Skip,
310+
]),
311+
Some(
312+
vec![
313+
// select the entire first row group
314+
RowSelector::select(10),
315+
// selectors from the second row group
316+
RowSelector::select(5),
317+
RowSelector::skip(7)
318+
]
319+
.into()
320+
)
321+
);
322+
}
323+
324+
#[test]
325+
fn test_overall_row_selection_mixed_2() {
326+
assert_eq!(
327+
overall_row_selection(vec![
328+
RowGroupAccess::Skip,
329+
RowGroupAccess::Scan,
330+
RowGroupAccess::Selection(
331+
vec![RowSelector::select(5), RowSelector::skip(7)].into()
332+
),
333+
RowGroupAccess::Scan,
334+
]),
335+
Some(
336+
vec![
337+
// select the entire second row group
338+
RowSelector::select(20),
339+
// selectors from the third row group
340+
RowSelector::select(5),
341+
RowSelector::skip(7),
342+
// select the entire fourth row group
343+
RowSelector::select(40),
344+
]
345+
.into()
346+
)
347+
);
348+
}
349+
350+
/// Computes the overall row selection for the given row group access list
351+
fn overall_row_selection(
352+
row_group_access: Vec<RowGroupAccess>,
353+
) -> Option<RowSelection> {
354+
let access_plan = ParquetAccessPlan::new(row_group_access);
355+
access_plan.into_overall_row_selection(row_group_metadata())
356+
}
357+
358+
static ROW_GROUP_METADATA: OnceLock<Vec<RowGroupMetaData>> = OnceLock::new();
359+
360+
/// [`RowGroupMetaData`] that returns 4 row groups with 10, 20, 30, 40 rows
361+
/// respectively
362+
fn row_group_metadata() -> &'static [RowGroupMetaData] {
363+
ROW_GROUP_METADATA.get_or_init(|| {
364+
let schema_descr = get_test_schema_descr();
365+
let row_counts = [10, 20, 30, 40];
366+
367+
row_counts
368+
.into_iter()
369+
.map(|num_rows| {
370+
let column = ColumnChunkMetaData::builder(schema_descr.column(0))
371+
.set_num_values(num_rows)
372+
.build()
373+
.unwrap();
374+
375+
RowGroupMetaData::builder(schema_descr.clone())
376+
.set_num_rows(num_rows)
377+
.set_column_metadata(vec![column])
378+
.build()
379+
.unwrap()
380+
})
381+
.collect()
382+
})
383+
}
384+
385+
/// Single column schema with a single column named "a" of type `BYTE_ARRAY`/`String`
386+
fn get_test_schema_descr() -> SchemaDescPtr {
387+
use parquet::basic::Type as PhysicalType;
388+
use parquet::schema::types::Type as SchemaType;
389+
let field = SchemaType::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
390+
.with_logical_type(Some(LogicalType::String))
391+
.build()
392+
.unwrap();
393+
let schema = SchemaType::group_type_builder("schema")
394+
.with_fields(vec![Arc::new(field)])
395+
.build()
396+
.unwrap();
397+
Arc::new(SchemaDescriptor::new(Arc::new(schema)))
398+
}
399+
}

0 commit comments

Comments
 (0)