|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; |
| 19 | +use parquet::file::metadata::RowGroupMetaData; |
| 20 | + |
| 21 | +/// A selection of rows and row groups within a ParquetFile to decode. |
| 22 | +/// |
| 23 | +/// A `ParquetAccessPlan` is used to limits the row groups and data pages a `ParquetExec` |
| 24 | +/// will read and decode and this improve performance. |
| 25 | +/// |
| 26 | +/// Note that page level pruning based on ArrowPredicate is applied after all of |
| 27 | +/// these selections |
| 28 | +/// |
| 29 | +/// # Example |
| 30 | +/// |
| 31 | +/// For example, given a Parquet file with 4 row groups, a `ParquetAccessPlan` |
| 32 | +/// can be used to specify skipping row group 0 and 2, scanning a range of rows |
| 33 | +/// in row group 1, and scanning all rows in row group 3 as follows: |
| 34 | +/// |
| 35 | +/// ```rust |
| 36 | +/// # use parquet::arrow::arrow_reader::{RowSelection, RowSelector}; |
| 37 | +/// # use datafusion::datasource::physical_plan::parquet::ParquetAccessPlan; |
| 38 | +/// // Default to scan all row groups |
| 39 | +/// let mut access_plan = ParquetAccessPlan::new_all(4); |
| 40 | +/// access_plan.skip(0); // skip row group |
| 41 | +/// // Use parquet reader RowSelector to specify scanning rows 100-200 and 350-400 |
| 42 | +/// let row_selection = RowSelection::from(vec![ |
| 43 | +/// RowSelector::skip(100), |
| 44 | +/// RowSelector::select(100), |
| 45 | +/// RowSelector::skip(150), |
| 46 | +/// RowSelector::select(50), |
| 47 | +/// ]); |
| 48 | +/// access_plan.scan_selection(1, row_selection); |
| 49 | +/// access_plan.skip(2); // skip row group 2 |
| 50 | +/// // row group 3 is scanned by default |
| 51 | +/// ``` |
| 52 | +/// |
| 53 | +/// The resulting plan would look like: |
| 54 | +/// |
| 55 | +/// ```text |
| 56 | +/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ |
| 57 | +/// |
| 58 | +/// │ │ SKIP |
| 59 | +/// |
| 60 | +/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ |
| 61 | +/// Row Group 0 |
| 62 | +/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ |
| 63 | +/// ┌────────────────┐ SCAN ONLY ROWS |
| 64 | +/// │└────────────────┘ │ 100-200 |
| 65 | +/// ┌────────────────┐ 350-400 |
| 66 | +/// │└────────────────┘ │ |
| 67 | +/// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
| 68 | +/// Row Group 1 |
| 69 | +/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ |
| 70 | +/// SKIP |
| 71 | +/// │ │ |
| 72 | +/// |
| 73 | +/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ |
| 74 | +/// Row Group 2 |
| 75 | +/// ┌───────────────────┐ |
| 76 | +/// │ │ SCAN ALL ROWS |
| 77 | +/// │ │ |
| 78 | +/// │ │ |
| 79 | +/// └───────────────────┘ |
| 80 | +/// Row Group 3 |
| 81 | +/// ``` |
| 82 | +#[derive(Debug, Clone, PartialEq)] |
| 83 | +pub struct ParquetAccessPlan { |
| 84 | + /// How to access the i-th row group |
| 85 | + row_groups: Vec<RowGroupAccess>, |
| 86 | +} |
| 87 | + |
| 88 | +/// Describes how the parquet reader will access a row group |
| 89 | +#[derive(Debug, Clone, PartialEq)] |
| 90 | +pub enum RowGroupAccess { |
| 91 | + /// Do not read the row group at all |
| 92 | + Skip, |
| 93 | + /// Read all rows from the row group |
| 94 | + Scan, |
| 95 | + /// Scan only the specified rows within the row group |
| 96 | + Selection(RowSelection), |
| 97 | +} |
| 98 | + |
| 99 | +impl RowGroupAccess { |
| 100 | + /// Return true if this row group should be scanned |
| 101 | + pub fn should_scan(&self) -> bool { |
| 102 | + match self { |
| 103 | + RowGroupAccess::Skip => false, |
| 104 | + RowGroupAccess::Scan | RowGroupAccess::Selection(_) => true, |
| 105 | + } |
| 106 | + } |
| 107 | +} |
| 108 | + |
| 109 | +impl ParquetAccessPlan { |
| 110 | + /// Create a new `ParquetAccessPlan` that scans all row groups |
| 111 | + pub fn new_all(row_group_count: usize) -> Self { |
| 112 | + Self { |
| 113 | + row_groups: vec![RowGroupAccess::Scan; row_group_count], |
| 114 | + } |
| 115 | + } |
| 116 | + |
| 117 | + /// Create a new `ParquetAccessPlan` that scans no row groups |
| 118 | + pub fn new_none(row_group_count: usize) -> Self { |
| 119 | + Self { |
| 120 | + row_groups: vec![RowGroupAccess::Skip; row_group_count], |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + /// Create a new `ParquetAccessPlan` from the specified [`RowGroupAccess`]es |
| 125 | + pub fn new(row_groups: Vec<RowGroupAccess>) -> Self { |
| 126 | + Self { row_groups } |
| 127 | + } |
| 128 | + |
| 129 | + /// Set the i-th row group to the specified [`RowGroupAccess`] |
| 130 | + pub fn set(&mut self, idx: usize, access: RowGroupAccess) { |
| 131 | + self.row_groups[idx] = access; |
| 132 | + } |
| 133 | + |
| 134 | + /// skips the i-th row group (should not be scanned) |
| 135 | + pub fn skip(&mut self, idx: usize) { |
| 136 | + self.set(idx, RowGroupAccess::Skip); |
| 137 | + } |
| 138 | + |
| 139 | + /// Return true if the i-th row group should be scanned |
| 140 | + pub fn should_scan(&self, idx: usize) -> bool { |
| 141 | + self.row_groups[idx].should_scan() |
| 142 | + } |
| 143 | + |
| 144 | + /// Set to scan only the [`RowSelection`] in the specified row group. |
| 145 | + /// |
| 146 | + /// Behavior is different depending on the existing access |
| 147 | + /// * [`RowGroupAccess::Skip`]: does nothing |
| 148 | + /// * [`RowGroupAccess::Scan`]: Updates to scan only the rows in the `RowSelection` |
| 149 | + /// * [`RowGroupAccess::Selection`]: Updates to scan only the intersection of the existing selection and the new selection |
| 150 | + pub fn scan_selection(&mut self, idx: usize, selection: RowSelection) { |
| 151 | + self.row_groups[idx] = match &self.row_groups[idx] { |
| 152 | + // already skipping the entire row group |
| 153 | + RowGroupAccess::Skip => RowGroupAccess::Skip, |
| 154 | + RowGroupAccess::Scan => RowGroupAccess::Selection(selection), |
| 155 | + RowGroupAccess::Selection(existing_selection) => { |
| 156 | + RowGroupAccess::Selection(existing_selection.intersection(&selection)) |
| 157 | + } |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + /// Return the overall `RowSelection` for all scanned row groups |
| 162 | + /// |
| 163 | + /// This is used to compute the row selection for the parquet reader. See |
| 164 | + /// [`ArrowReaderBuilder::with_row_selection`] for more details. |
| 165 | + /// |
| 166 | + /// Returns |
| 167 | + /// * `None` if there are no [`RowGroupAccess::Selection`] |
| 168 | + /// * `Some(selection)` if there are [`RowGroupAccess::Selection`]s |
| 169 | + /// |
| 170 | + /// The returned selection represents which rows to scan across any row |
| 171 | + /// row groups which are not skipped. |
| 172 | + /// |
| 173 | + /// # Example |
| 174 | + /// |
| 175 | + /// Given an access plan like this: |
| 176 | + /// |
| 177 | + /// ```text |
| 178 | + /// Scan (scan all row group 0) |
| 179 | + /// Skip (skip row group 1) |
| 180 | + /// Select 50-100 (scan rows 50-100 in row group 2) |
| 181 | + /// ``` |
| 182 | + /// |
| 183 | + /// Assuming each row group has 1000 rows, the resulting row selection would |
| 184 | + /// be the rows to scan in row group 0 and 2: |
| 185 | + /// |
| 186 | + /// ```text |
| 187 | + /// Select 1000 (scan all rows in row group 0) |
| 188 | + /// Select 50-100 (scan rows 50-100 in row group 2) |
| 189 | + /// ``` |
| 190 | + /// |
| 191 | + /// Note there is no entry for the (entirely) skipped row group 1. |
| 192 | + /// |
| 193 | + /// [`ArrowReaderBuilder::with_row_selection`]: parquet::arrow::arrow_reader::ArrowReaderBuilder::with_row_selection |
| 194 | + pub fn into_overall_row_selection( |
| 195 | + self, |
| 196 | + row_group_meta_data: &[RowGroupMetaData], |
| 197 | + ) -> Option<RowSelection> { |
| 198 | + assert_eq!(row_group_meta_data.len(), self.row_groups.len()); |
| 199 | + if !self |
| 200 | + .row_groups |
| 201 | + .iter() |
| 202 | + .any(|rg| matches!(rg, RowGroupAccess::Selection(_))) |
| 203 | + { |
| 204 | + return None; |
| 205 | + } |
| 206 | + |
| 207 | + let total_selection: RowSelection = self |
| 208 | + .row_groups |
| 209 | + .into_iter() |
| 210 | + .zip(row_group_meta_data.iter()) |
| 211 | + .flat_map(|(rg, rg_meta)| { |
| 212 | + match rg { |
| 213 | + RowGroupAccess::Skip => vec![], |
| 214 | + RowGroupAccess::Scan => { |
| 215 | + // need a row group access to scan the entire row group (need row group counts) |
| 216 | + vec![RowSelector::select(rg_meta.num_rows() as usize)] |
| 217 | + } |
| 218 | + RowGroupAccess::Selection(selection) => { |
| 219 | + let selection: Vec<RowSelector> = selection.into(); |
| 220 | + selection |
| 221 | + } |
| 222 | + } |
| 223 | + }) |
| 224 | + .collect(); |
| 225 | + |
| 226 | + Some(total_selection) |
| 227 | + } |
| 228 | + |
| 229 | + /// Return an iterator over the row group indexes that should be scanned |
| 230 | + pub fn row_group_index_iter(&self) -> impl Iterator<Item = usize> + '_ { |
| 231 | + self.row_groups.iter().enumerate().filter_map(|(idx, b)| { |
| 232 | + if b.should_scan() { |
| 233 | + Some(idx) |
| 234 | + } else { |
| 235 | + None |
| 236 | + } |
| 237 | + }) |
| 238 | + } |
| 239 | + |
| 240 | + /// Return a vec of all row group indexes to scan |
| 241 | + pub fn row_group_indexes(&self) -> Vec<usize> { |
| 242 | + self.row_group_index_iter().collect() |
| 243 | + } |
| 244 | + |
| 245 | + /// Return the total number of row groups (not the total number or groups to |
| 246 | + /// scan) |
| 247 | + pub fn len(&self) -> usize { |
| 248 | + self.row_groups.len() |
| 249 | + } |
| 250 | + |
| 251 | + /// Return true if there are no row groups |
| 252 | + pub fn is_empty(&self) -> bool { |
| 253 | + self.row_groups.is_empty() |
| 254 | + } |
| 255 | + |
| 256 | + /// Get a reference to the inner accesses |
| 257 | + pub fn inner(&self) -> &[RowGroupAccess] { |
| 258 | + &self.row_groups |
| 259 | + } |
| 260 | + |
| 261 | + /// Covert into the inner row group accesses |
| 262 | + pub fn into_inner(self) -> Vec<RowGroupAccess> { |
| 263 | + self.row_groups |
| 264 | + } |
| 265 | +} |
| 266 | + |
| 267 | +#[cfg(test)] |
| 268 | +mod test { |
| 269 | + use super::*; |
| 270 | + use parquet::basic::LogicalType; |
| 271 | + use parquet::file::metadata::ColumnChunkMetaData; |
| 272 | + use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor}; |
| 273 | + use std::sync::{Arc, OnceLock}; |
| 274 | + |
| 275 | + #[test] |
| 276 | + fn test_overall_row_selection_only_scans() { |
| 277 | + assert_eq!( |
| 278 | + overall_row_selection(vec![ |
| 279 | + RowGroupAccess::Scan, |
| 280 | + RowGroupAccess::Scan, |
| 281 | + RowGroupAccess::Scan, |
| 282 | + RowGroupAccess::Scan, |
| 283 | + ]), |
| 284 | + None |
| 285 | + ); |
| 286 | + } |
| 287 | + |
| 288 | + #[test] |
| 289 | + fn test_overall_row_selection_only_skips() { |
| 290 | + assert_eq!( |
| 291 | + overall_row_selection(vec![ |
| 292 | + RowGroupAccess::Skip, |
| 293 | + RowGroupAccess::Skip, |
| 294 | + RowGroupAccess::Skip, |
| 295 | + RowGroupAccess::Skip, |
| 296 | + ]), |
| 297 | + None |
| 298 | + ); |
| 299 | + } |
| 300 | + #[test] |
| 301 | + fn test_overall_row_selection_mixed_1() { |
| 302 | + assert_eq!( |
| 303 | + overall_row_selection(vec![ |
| 304 | + RowGroupAccess::Scan, |
| 305 | + RowGroupAccess::Selection( |
| 306 | + vec![RowSelector::select(5), RowSelector::skip(7)].into() |
| 307 | + ), |
| 308 | + RowGroupAccess::Skip, |
| 309 | + RowGroupAccess::Skip, |
| 310 | + ]), |
| 311 | + Some( |
| 312 | + vec![ |
| 313 | + // select the entire first row group |
| 314 | + RowSelector::select(10), |
| 315 | + // selectors from the second row group |
| 316 | + RowSelector::select(5), |
| 317 | + RowSelector::skip(7) |
| 318 | + ] |
| 319 | + .into() |
| 320 | + ) |
| 321 | + ); |
| 322 | + } |
| 323 | + |
| 324 | + #[test] |
| 325 | + fn test_overall_row_selection_mixed_2() { |
| 326 | + assert_eq!( |
| 327 | + overall_row_selection(vec![ |
| 328 | + RowGroupAccess::Skip, |
| 329 | + RowGroupAccess::Scan, |
| 330 | + RowGroupAccess::Selection( |
| 331 | + vec![RowSelector::select(5), RowSelector::skip(7)].into() |
| 332 | + ), |
| 333 | + RowGroupAccess::Scan, |
| 334 | + ]), |
| 335 | + Some( |
| 336 | + vec![ |
| 337 | + // select the entire second row group |
| 338 | + RowSelector::select(20), |
| 339 | + // selectors from the third row group |
| 340 | + RowSelector::select(5), |
| 341 | + RowSelector::skip(7), |
| 342 | + // select the entire fourth row group |
| 343 | + RowSelector::select(40), |
| 344 | + ] |
| 345 | + .into() |
| 346 | + ) |
| 347 | + ); |
| 348 | + } |
| 349 | + |
| 350 | + /// Computes the overall row selection for the given row group access list |
| 351 | + fn overall_row_selection( |
| 352 | + row_group_access: Vec<RowGroupAccess>, |
| 353 | + ) -> Option<RowSelection> { |
| 354 | + let access_plan = ParquetAccessPlan::new(row_group_access); |
| 355 | + access_plan.into_overall_row_selection(row_group_metadata()) |
| 356 | + } |
| 357 | + |
| 358 | + static ROW_GROUP_METADATA: OnceLock<Vec<RowGroupMetaData>> = OnceLock::new(); |
| 359 | + |
| 360 | + /// [`RowGroupMetaData`] that returns 4 row groups with 10, 20, 30, 40 rows |
| 361 | + /// respectively |
| 362 | + fn row_group_metadata() -> &'static [RowGroupMetaData] { |
| 363 | + ROW_GROUP_METADATA.get_or_init(|| { |
| 364 | + let schema_descr = get_test_schema_descr(); |
| 365 | + let row_counts = [10, 20, 30, 40]; |
| 366 | + |
| 367 | + row_counts |
| 368 | + .into_iter() |
| 369 | + .map(|num_rows| { |
| 370 | + let column = ColumnChunkMetaData::builder(schema_descr.column(0)) |
| 371 | + .set_num_values(num_rows) |
| 372 | + .build() |
| 373 | + .unwrap(); |
| 374 | + |
| 375 | + RowGroupMetaData::builder(schema_descr.clone()) |
| 376 | + .set_num_rows(num_rows) |
| 377 | + .set_column_metadata(vec![column]) |
| 378 | + .build() |
| 379 | + .unwrap() |
| 380 | + }) |
| 381 | + .collect() |
| 382 | + }) |
| 383 | + } |
| 384 | + |
| 385 | + /// Single column schema with a single column named "a" of type `BYTE_ARRAY`/`String` |
| 386 | + fn get_test_schema_descr() -> SchemaDescPtr { |
| 387 | + use parquet::basic::Type as PhysicalType; |
| 388 | + use parquet::schema::types::Type as SchemaType; |
| 389 | + let field = SchemaType::primitive_type_builder("a", PhysicalType::BYTE_ARRAY) |
| 390 | + .with_logical_type(Some(LogicalType::String)) |
| 391 | + .build() |
| 392 | + .unwrap(); |
| 393 | + let schema = SchemaType::group_type_builder("schema") |
| 394 | + .with_fields(vec![Arc::new(field)]) |
| 395 | + .build() |
| 396 | + .unwrap(); |
| 397 | + Arc::new(SchemaDescriptor::new(Arc::new(schema))) |
| 398 | + } |
| 399 | +} |
0 commit comments