Skip to content

Commit 341ec35

Browse files
authored
stop panic in MetadataLoader on invalid data (#6367)
* stop panic in MetadataLoader on invalid data * better check for invalid prefect * limit hint instead of erroring * import FOOTER_SIZE
1 parent b4de692 commit 341ec35

File tree

1 file changed

+21
-10
lines changed

1 file changed

+21
-10
lines changed

parquet/src/arrow/async_reader/metadata.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::file::footer::{decode_footer, decode_metadata};
2121
use crate::file::metadata::ParquetMetaData;
2222
use crate::file::page_index::index::Index;
2323
use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index};
24+
use crate::file::FOOTER_SIZE;
2425
use bytes::Bytes;
2526
use futures::future::BoxFuture;
2627
use futures::FutureExt;
@@ -53,7 +54,7 @@ impl<F: MetadataFetch> MetadataLoader<F> {
5354
///
5455
/// See [`fetch_parquet_metadata`] for the meaning of the individual parameters
5556
pub async fn load(mut fetch: F, file_size: usize, prefetch: Option<usize>) -> Result<Self> {
56-
if file_size < 8 {
57+
if file_size < FOOTER_SIZE {
5758
return Err(ParquetError::EOF(format!(
5859
"file size of {file_size} is less than footer"
5960
)));
@@ -62,20 +63,22 @@ impl<F: MetadataFetch> MetadataLoader<F> {
6263
// If a size hint is provided, read more than the minimum size
6364
// to try and avoid a second fetch.
6465
let footer_start = if let Some(size_hint) = prefetch {
66+
// check for hint smaller than footer
67+
let size_hint = std::cmp::max(size_hint, FOOTER_SIZE);
6568
file_size.saturating_sub(size_hint)
6669
} else {
67-
file_size - 8
70+
file_size - FOOTER_SIZE
6871
};
6972

7073
let suffix = fetch.fetch(footer_start..file_size).await?;
7174
let suffix_len = suffix.len();
7275

73-
let mut footer = [0; 8];
74-
footer.copy_from_slice(&suffix[suffix_len - 8..suffix_len]);
76+
let mut footer = [0; FOOTER_SIZE];
77+
footer.copy_from_slice(&suffix[suffix_len - FOOTER_SIZE..suffix_len]);
7578

7679
let length = decode_footer(&footer)?;
7780

78-
if file_size < length + 8 {
81+
if file_size < length + FOOTER_SIZE {
7982
return Err(ParquetError::EOF(format!(
8083
"file size of {} is less than footer + metadata {}",
8184
file_size,
@@ -84,14 +87,14 @@ impl<F: MetadataFetch> MetadataLoader<F> {
8487
}
8588

8689
// Did not fetch the entire file metadata in the initial read, need to make a second request
87-
let (metadata, remainder) = if length > suffix_len - 8 {
88-
let metadata_start = file_size - length - 8;
89-
let meta = fetch.fetch(metadata_start..file_size - 8).await?;
90+
let (metadata, remainder) = if length > suffix_len - FOOTER_SIZE {
91+
let metadata_start = file_size - length - FOOTER_SIZE;
92+
let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?;
9093
(decode_metadata(&meta)?, None)
9194
} else {
92-
let metadata_start = file_size - length - 8 - footer_start;
95+
let metadata_start = file_size - length - FOOTER_SIZE - footer_start;
9396

94-
let slice = &suffix[metadata_start..suffix_len - 8];
97+
let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE];
9598
(
9699
decode_metadata(slice)?,
97100
Some((footer_start, suffix.slice(..metadata_start))),
@@ -273,6 +276,14 @@ mod tests {
273276
assert_eq!(actual.file_metadata().schema(), expected);
274277
assert_eq!(fetch_count.load(Ordering::SeqCst), 2);
275278

279+
// Metadata hint too small - below footer size
280+
fetch_count.store(0, Ordering::SeqCst);
281+
let actual = fetch_parquet_metadata(&mut fetch, len, Some(7))
282+
.await
283+
.unwrap();
284+
assert_eq!(actual.file_metadata().schema(), expected);
285+
assert_eq!(fetch_count.load(Ordering::SeqCst), 2);
286+
276287
// Metadata hint too small
277288
fetch_count.store(0, Ordering::SeqCst);
278289
let actual = fetch_parquet_metadata(&mut fetch, len, Some(10))

0 commit comments

Comments
 (0)