Skip to content

Split out integration test plumbing (#2594) (#2300) #2598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/arrow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ jobs:
cargo run --example dynamic_types
cargo run --example read_csv
cargo run --example read_csv_infer_schema
- name: Run non-archery based integration-tests
run: cargo test -p arrow-integration-testing

# test compilaton features
linux-features:
Expand Down
1 change: 0 additions & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ packed_simd = { version = "0.3", default-features = false, optional = true, pack
chrono = { version = "0.4", default-features = false, features = ["clock"] }
chrono-tz = { version = "0.6", default-features = false, optional = true }
flatbuffers = { version = "2.1.2", default-features = false, features = ["thiserror"], optional = true }
hex = { version = "0.4", default-features = false, features = ["std"] }
comfy-table = { version = "6.0", optional = true, default-features = false }
pyo3 = { version = "0.17", default-features = false, optional = true }
lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
Expand Down
346 changes: 1 addition & 345 deletions arrow/src/ipc/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1173,336 +1173,8 @@ mod tests {

use std::fs::File;

use flate2::read::GzDecoder;

use crate::datatypes;
use crate::datatypes::{ArrowNativeType, Float64Type, Int32Type, Int8Type};
use crate::{datatypes, util::integration_util::*};

#[test]
#[cfg(not(feature = "force_validate"))]
fn read_generated_files_014() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "0.14.1";
// the test is repetitive, thus we can read all supported files at once
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_dictionary",
"generated_map",
"generated_nested",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
"generated_decimal",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
testdata, version, path
))
.unwrap();

let mut reader = FileReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
});
}

#[test]
#[should_panic(expected = "Big Endian is not supported for Decimal!")]
fn read_decimal_be_file_should_panic() {
let testdata = crate::util::test_util::arrow_test_data();
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_decimal.arrow_file",
testdata
))
.unwrap();
FileReader::try_new(file, None).unwrap();
}

#[test]
#[should_panic(
expected = "Last offset 687865856 of Utf8 is larger than values length 41"
)]
fn read_dictionary_be_not_implemented() {
// The offsets are not translated for big-endian files
// https://github.com/apache/arrow-rs/issues/859
let testdata = crate::util::test_util::arrow_test_data();
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/generated_dictionary.arrow_file",
testdata
))
.unwrap();
FileReader::try_new(file, None).unwrap();
}

#[test]
fn read_generated_be_files_should_work() {
// complementary to the previous test
let testdata = crate::util::test_util::arrow_test_data();
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_map",
"generated_nested",
"generated_null_trivial",
"generated_null",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/1.0.0-bigendian/{}.arrow_file",
testdata, path
))
.unwrap();

FileReader::try_new(file, None).unwrap();
});
}

#[test]
fn projection_should_work() {
// complementary to the previous test
let testdata = crate::util::test_util::arrow_test_data();
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_map",
"generated_nested",
"generated_null_trivial",
"generated_null",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
];
paths.iter().for_each(|path| {
// We must use littleendian files here.
// The offsets are not translated for big-endian files
// https://github.com/apache/arrow-rs/issues/859
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/1.0.0-littleendian/{}.arrow_file",
testdata, path
))
.unwrap();

let reader = FileReader::try_new(file, Some(vec![0])).unwrap();
let datatype_0 = reader.schema().fields()[0].data_type().clone();
reader.for_each(|batch| {
let batch = batch.unwrap();
assert_eq!(batch.columns().len(), 1);
assert_eq!(datatype_0, batch.schema().fields()[0].data_type().clone());
});
});
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn read_generated_streams_014() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "0.14.1";
// the test is repetitive, thus we can read all supported files at once
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_dictionary",
"generated_map",
"generated_nested",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
"generated_decimal",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.stream",
testdata, version, path
))
.unwrap();

let mut reader = StreamReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
// the next batch must be empty
assert!(reader.next().is_none());
// the stream must indicate that it's finished
assert!(reader.is_finished());
});
}

#[test]
fn read_generated_files_100() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "1.0.0-littleendian";
// the test is repetitive, thus we can read all supported files at once
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_dictionary",
"generated_map",
// "generated_map_non_canonical",
"generated_nested",
"generated_null_trivial",
"generated_null",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
testdata, version, path
))
.unwrap();

let mut reader = FileReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
});
}

#[test]
fn read_generated_streams_100() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "1.0.0-littleendian";
// the test is repetitive, thus we can read all supported files at once
let paths = vec![
"generated_interval",
"generated_datetime",
"generated_dictionary",
"generated_map",
// "generated_map_non_canonical",
"generated_nested",
"generated_null_trivial",
"generated_null",
"generated_primitive_no_batches",
"generated_primitive_zerolength",
"generated_primitive",
];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.stream",
testdata, version, path
))
.unwrap();

let mut reader = StreamReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
// the next batch must be empty
assert!(reader.next().is_none());
// the stream must indicate that it's finished
assert!(reader.is_finished());
});
}

#[test]
#[cfg(feature = "ipc_compression")]
fn read_generated_streams_200() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "2.0.0-compression";

// the test is repetitive, thus we can read all supported files at once
let paths = vec!["generated_lz4", "generated_zstd"];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.stream",
testdata, version, path
))
.unwrap();

let mut reader = StreamReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
// the next batch must be empty
assert!(reader.next().is_none());
// the stream must indicate that it's finished
assert!(reader.is_finished());
});
}

#[test]
#[cfg(not(feature = "ipc_compression"))]
fn read_generated_streams_200_negative() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "2.0.0-compression";

// the test is repetitive, thus we can read all supported files at once
let cases = vec![("generated_lz4", "LZ4_FRAME"), ("generated_zstd", "ZSTD")];
cases.iter().for_each(|(path, compression_name)| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.stream",
testdata, version, path
))
.unwrap();

let mut reader = StreamReader::try_new(file, None).unwrap();
let err = reader.next().unwrap().unwrap_err();
let expected_error = format!(
"Invalid argument error: compression type {} not supported because arrow was not compiled with the ipc_compression feature",
compression_name
);
assert_eq!(err.to_string(), expected_error);
});
}

#[test]
#[cfg(feature = "ipc_compression")]
fn read_generated_files_200() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "2.0.0-compression";
// the test is repetitive, thus we can read all supported files at once
let paths = vec!["generated_lz4", "generated_zstd"];
paths.iter().for_each(|path| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
testdata, version, path
))
.unwrap();

let mut reader = FileReader::try_new(file, None).unwrap();

// read expected JSON output
let arrow_json = read_gzip_json(version, path);
assert!(arrow_json.equals_reader(&mut reader).unwrap());
});
}

#[test]
#[cfg(not(feature = "ipc_compression"))]
fn read_generated_files_200_negative() {
let testdata = crate::util::test_util::arrow_test_data();
let version = "2.0.0-compression";
// the test is repetitive, thus we can read all supported files at once
let cases = vec![("generated_lz4", "LZ4_FRAME"), ("generated_zstd", "ZSTD")];
cases.iter().for_each(|(path, compression_name)| {
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.arrow_file",
testdata, version, path
))
.unwrap();

let mut reader = FileReader::try_new(file, None).unwrap();

let err = reader.next().unwrap().unwrap_err();
let expected_error = format!(
"Invalid argument error: compression type {} not supported because arrow was not compiled with the ipc_compression feature",
compression_name
);
assert_eq!(err.to_string(), expected_error);
});
}

fn create_test_projection_schema() -> Schema {
// define field types
Expand Down Expand Up @@ -1816,22 +1488,6 @@ mod tests {
check_union_with_builder(UnionBuilder::new_sparse());
}

/// Read gzipped JSON file
fn read_gzip_json(version: &str, path: &str) -> ArrowJson {
let testdata = crate::util::test_util::arrow_test_data();
let file = File::open(format!(
"{}/arrow-ipc-stream/integration/{}/{}.json.gz",
testdata, version, path
))
.unwrap();
let mut gz = GzDecoder::new(&file);
let mut s = String::new();
gz.read_to_string(&mut s).unwrap();
// convert to Arrow JSON
let arrow_json: ArrowJson = serde_json::from_str(&s).unwrap();
arrow_json
}

#[test]
fn test_roundtrip_stream_nested_dict() {
let xs = vec!["AA", "BB", "AA", "CC", "BB"];
Expand Down
Loading