Skip to content

Commit b724849

Browse files
authored
Improve C Data Interface and Add Integration Testing Entrypoints (#5080)
* Add C Data Interface integration testing entrypoints * Allow importing FFI_ArrowArray with existing datatype * Clippy * Use ptr::write * Fix null_count for Null type * Use new from_raw() APIs * Address some review comments. * Add unsafe markers * Try to fix CI * Revamp ArrowFile
1 parent 4d141a3 commit b724849

File tree

12 files changed

+363
-137
lines changed

12 files changed

+363
-137
lines changed

arrow-data/src/ffi.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ impl FFI_ArrowArray {
168168
.collect::<Box<_>>();
169169
let n_children = children.len() as i64;
170170

171+
// As in the IPC format, emit null_count = length for Null type
172+
let null_count = match data.data_type() {
173+
DataType::Null => data.len(),
174+
_ => data.null_count(),
175+
};
176+
171177
// create the private data owning everything.
172178
// any other data must be added here, e.g. via a struct, to track lifetime.
173179
let mut private_data = Box::new(ArrayPrivateData {
@@ -179,7 +185,7 @@ impl FFI_ArrowArray {
179185

180186
Self {
181187
length: data.len() as i64,
182-
null_count: data.null_count() as i64,
188+
null_count: null_count as i64,
183189
offset: data.offset() as i64,
184190
n_buffers,
185191
n_children,

arrow-integration-testing/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,14 @@ edition = { workspace = true }
2727
publish = false
2828
rust-version = { workspace = true }
2929

30+
[lib]
31+
crate-type = ["lib", "cdylib"]
32+
3033
[features]
3134
logging = ["tracing-subscriber"]
3235

3336
[dependencies]
34-
arrow = { path = "../arrow", default-features = false, features = ["test_utils", "ipc", "ipc_compression", "json"] }
37+
arrow = { path = "../arrow", default-features = false, features = ["test_utils", "ipc", "ipc_compression", "json", "ffi"] }
3538
arrow-flight = { path = "../arrow-flight", default-features = false }
3639
arrow-buffer = { path = "../arrow-buffer", default-features = false }
3740
arrow-integration-test = { path = "../arrow-integration-test", default-features = false }

arrow-integration-testing/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ ln -s <path_to_arrow_rs> arrow/rust
4848

4949
```shell
5050
cd arrow
51-
pip install -e dev/archery[docker]
51+
pip install -e dev/archery[integration]
5252
```
5353

5454
### Build the C++ binaries:

arrow-integration-testing/src/bin/arrow-json-integration-test.rs

Lines changed: 5 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,13 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::datatypes::{DataType, Field};
19-
use arrow::datatypes::{Fields, Schema};
2018
use arrow::error::{ArrowError, Result};
2119
use arrow::ipc::reader::FileReader;
2220
use arrow::ipc::writer::FileWriter;
2321
use arrow_integration_test::*;
24-
use arrow_integration_testing::read_json_file;
22+
use arrow_integration_testing::{canonicalize_schema, open_json_file};
2523
use clap::Parser;
2624
use std::fs::File;
27-
use std::sync::Arc;
2825

2926
#[derive(clap::ValueEnum, Debug, Clone)]
3027
#[clap(rename_all = "SCREAMING_SNAKE_CASE")]
@@ -66,12 +63,12 @@ fn json_to_arrow(json_name: &str, arrow_name: &str, verbose: bool) -> Result<()>
6663
eprintln!("Converting {json_name} to {arrow_name}");
6764
}
6865

69-
let json_file = read_json_file(json_name)?;
66+
let json_file = open_json_file(json_name)?;
7067

7168
let arrow_file = File::create(arrow_name)?;
7269
let mut writer = FileWriter::try_new(arrow_file, &json_file.schema)?;
7370

74-
for b in json_file.batches {
71+
for b in json_file.read_batches()? {
7572
writer.write(&b)?;
7673
}
7774

@@ -113,49 +110,13 @@ fn arrow_to_json(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()>
113110
Ok(())
114111
}
115112

116-
fn canonicalize_schema(schema: &Schema) -> Schema {
117-
let fields = schema
118-
.fields()
119-
.iter()
120-
.map(|field| match field.data_type() {
121-
DataType::Map(child_field, sorted) => match child_field.data_type() {
122-
DataType::Struct(fields) if fields.len() == 2 => {
123-
let first_field = fields.get(0).unwrap();
124-
let key_field =
125-
Arc::new(Field::new("key", first_field.data_type().clone(), false));
126-
let second_field = fields.get(1).unwrap();
127-
let value_field = Arc::new(Field::new(
128-
"value",
129-
second_field.data_type().clone(),
130-
second_field.is_nullable(),
131-
));
132-
133-
let fields = Fields::from([key_field, value_field]);
134-
let struct_type = DataType::Struct(fields);
135-
let child_field = Field::new("entries", struct_type, false);
136-
137-
Arc::new(Field::new(
138-
field.name().as_str(),
139-
DataType::Map(Arc::new(child_field), *sorted),
140-
field.is_nullable(),
141-
))
142-
}
143-
_ => panic!("The child field of Map type should be Struct type with 2 fields."),
144-
},
145-
_ => field.clone(),
146-
})
147-
.collect::<Fields>();
148-
149-
Schema::new(fields).with_metadata(schema.metadata().clone())
150-
}
151-
152113
fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
153114
if verbose {
154115
eprintln!("Validating {arrow_name} and {json_name}");
155116
}
156117

157118
// open JSON file
158-
let json_file = read_json_file(json_name)?;
119+
let json_file = open_json_file(json_name)?;
159120

160121
// open Arrow file
161122
let arrow_file = File::open(arrow_name)?;
@@ -170,7 +131,7 @@ fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
170131
)));
171132
}
172133

173-
let json_batches = &json_file.batches;
134+
let json_batches = json_file.read_batches()?;
174135

175136
// compare number of batches
176137
assert!(

arrow-integration-testing/src/flight_client_scenarios/integration_test.rs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::{read_json_file, ArrowFile};
18+
use crate::open_json_file;
1919
use std::collections::HashMap;
2020

2121
use arrow::{
@@ -45,23 +45,16 @@ pub async fn run_scenario(host: &str, port: u16, path: &str) -> Result {
4545

4646
let client = FlightServiceClient::connect(url).await?;
4747

48-
let ArrowFile {
49-
schema, batches, ..
50-
} = read_json_file(path)?;
48+
let json_file = open_json_file(path)?;
5149

52-
let schema = Arc::new(schema);
50+
let batches = json_file.read_batches()?;
51+
let schema = Arc::new(json_file.schema);
5352

5453
let mut descriptor = FlightDescriptor::default();
5554
descriptor.set_type(DescriptorType::Path);
5655
descriptor.path = vec![path.to_string()];
5756

58-
upload_data(
59-
client.clone(),
60-
schema.clone(),
61-
descriptor.clone(),
62-
batches.clone(),
63-
)
64-
.await?;
57+
upload_data(client.clone(), schema, descriptor.clone(), batches.clone()).await?;
6558
verify_data(client, descriptor, &batches).await?;
6659

6760
Ok(())

0 commit comments

Comments
 (0)