Skip to content

Commit 538e0d1

Browse files
authored
FFI initial implementation (#12920)
* Initial commit of FFI table provider code * Add table type * Make struct pub * Implementing supports_filters_pushdown * Move plan properties over to its own file * Adding release function * Adding release functions to additional structs * Resolve memory leaks * Rename ForeignExecutionPlan for consistency * Resolving memory leak issues * Remove debug statements. Create runtime for block_on operations * Switching over to stable abi and async-ffi * Make consistent the use of Foreign and FFI on struct names * Apply prettier * Format for linter * Add doc-comment * Add option to specify table provider does not support pushdown filters to avoid extra work for some providers * Remove setting default features in cargo file * Tokio only needed for unit tests * Provide log errors rather than failing silently on schema requests * Set default features for datafusion to false in ffi crate * Using TryFrom or From instead of implementing new when there is only one parameter * Move arrow wrappers into their own file * Add documentation * Small adjustment to documentation * Add license text * Fix unnecessary qualification * taplo format
1 parent 8c6bb39 commit 538e0d1

11 files changed

+1784
-0
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ members = [
2626
"datafusion/expr",
2727
"datafusion/expr-common",
2828
"datafusion/execution",
29+
"datafusion/ffi",
2930
"datafusion/functions",
3031
"datafusion/functions-aggregate",
3132
"datafusion/functions-aggregate-common",
@@ -99,6 +100,7 @@ datafusion-common-runtime = { path = "datafusion/common-runtime", version = "42.
99100
datafusion-execution = { path = "datafusion/execution", version = "42.1.0" }
100101
datafusion-expr = { path = "datafusion/expr", version = "42.1.0" }
101102
datafusion-expr-common = { path = "datafusion/expr-common", version = "42.1.0" }
103+
datafusion-ffi = { path = "datafusion/ffi", version = "42.1.0" }
102104
datafusion-functions = { path = "datafusion/functions", version = "42.1.0" }
103105
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "42.1.0" }
104106
datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "42.1.0" }

datafusion/ffi/Cargo.toml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "datafusion-ffi"
20+
description = "Foreign Function Interface implementation for DataFusion"
21+
readme = "README.md"
22+
version = { workspace = true }
23+
edition = { workspace = true }
24+
homepage = { workspace = true }
25+
repository = { workspace = true }
26+
license = { workspace = true }
27+
authors = { workspace = true }
28+
# Specify MSRV here as `cargo msrv` doesn't support workspace version
29+
rust-version = "1.76"
30+
31+
[lints]
32+
workspace = true
33+
34+
[lib]
35+
name = "datafusion_ffi"
36+
path = "src/lib.rs"
37+
38+
[dependencies]
39+
abi_stable = "0.11.3"
40+
arrow = { workspace = true, features = ["ffi"] }
41+
async-ffi = { version = "0.5.0", features = ["abi_stable"] }
42+
async-trait = { workspace = true }
43+
datafusion = { workspace = true, default-features = false }
44+
datafusion-proto = { workspace = true }
45+
doc-comment = { workspace = true }
46+
futures = { workspace = true }
47+
log = { workspace = true }
48+
prost = { workspace = true }
49+
50+
[dev-dependencies]
51+
tokio = { workspace = true }

datafusion/ffi/README.md

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
<!---
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# `datafusion-ffi`: Apache DataFusion Foreign Function Interface
21+
22+
This crate contains code to allow interoperability of Apache [DataFusion]
23+
with functions from other languages using a stable interface.
24+
25+
See [API Docs] for details and examples.
26+
27+
We expect this crate may be used by both sides of the FFI. This allows users
28+
to create modules that can interoperate with the necessity of using the same
29+
version of DataFusion. The driving use case has been the `datafusion-python`
30+
repository, but many other use cases may exist. We envision at least two
31+
use cases.
32+
33+
1. `datafusion-python` which will use the FFI to provide external services such
34+
as a `TableProvider` without needing to re-export the entire `datafusion-python`
35+
code base. With `datafusion-ffi` these packages do not need `datafusion-python`
36+
as a dependency at all.
37+
2. Users may want to create a modular interface that allows runtime loading of
38+
libraries.
39+
40+
## Struct Layout
41+
42+
In this crate we have a variety of structs which closely mimic the behavior of
43+
their internal counterparts. In the following example, we will refer to the
44+
`TableProvider`, but the same pattern exists for other structs.
45+
46+
Each of the exposted structs in this crate is provided with a variant prefixed
47+
with `Foreign`. This variant is designed to be used by the consumer of the
48+
foreign code. The `Foreign` structs should _never_ access the `private_data`
49+
fields. Instead they should only access the data returned through the function
50+
calls defined on the `FFI_` structs. The second purpose of the `Foreign`
51+
structs is to contain additional data that may be needed by the traits that
52+
are implemented on them. Some of these traits require borrowing data which
53+
can be far more convienent to be locally stored.
54+
55+
For example, we have a struct `FFI_TableProvider` to give access to the
56+
`TableProvider` functions like `table_type()` and `scan()`. If we write a
57+
library that wishes to expose it's `TableProvider`, then we can access the
58+
private data that contains the Arc reference to the `TableProvider` via
59+
`FFI_TableProvider`. This data is local to the library.
60+
61+
If we have a program that accesses a `TableProvider` via FFI, then it
62+
will use `ForeignTableProvider`. When using `ForeignTableProvider` we **must**
63+
not attempt to access the `private_data` field in `FFI_TableProvider`. If a
64+
user is testing locally, you may be able to successfully access this field, but
65+
it will only work if you are building against the exact same version of
66+
`DataFusion` for both libraries **and** the same compiler. It will not work
67+
in general.
68+
69+
It is worth noting that which library is the `local` and which is `foreign`
70+
depends on which interface we are considering. For example, suppose we have a
71+
Python library called `my_provider` that exposes a `TableProvider` called
72+
`MyProvider` via `FFI_TableProvider`. Within the library `my_provider` we can
73+
access the `private_data` via `FFI_TableProvider`. We connect this to
74+
`datafusion-python`, where we access it as a `ForeignTableProvider`. Now when
75+
we call `scan()` on this interface, we have to pass it a `FFI_SessionConfig`.
76+
The `SessionConfig` is local to `datafusion-python` and **not** `my_provider`.
77+
It is important to be careful when expanding these functions to be certain which
78+
side of the interface each object refers to.
79+
80+
[datafusion]: https://datafusion.apache.org
81+
[api docs]: http://docs.rs/datafusion-ffi/latest

datafusion/ffi/src/arrow_wrappers.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::sync::Arc;
19+
20+
use abi_stable::StableAbi;
21+
use arrow::{
22+
datatypes::{Schema, SchemaRef},
23+
ffi::{FFI_ArrowArray, FFI_ArrowSchema},
24+
};
25+
use log::error;
26+
27+
/// This is a wrapper struct around FFI_ArrowSchema simply to indicate
28+
/// to the StableAbi macros that the underlying struct is FFI safe.
29+
#[repr(C)]
30+
#[derive(Debug, StableAbi)]
31+
pub struct WrappedSchema(#[sabi(unsafe_opaque_field)] pub FFI_ArrowSchema);
32+
33+
impl From<SchemaRef> for WrappedSchema {
34+
fn from(value: SchemaRef) -> Self {
35+
let ffi_schema = match FFI_ArrowSchema::try_from(value.as_ref()) {
36+
Ok(s) => s,
37+
Err(e) => {
38+
error!("Unable to convert DataFusion Schema to FFI_ArrowSchema in FFI_PlanProperties. {}", e);
39+
FFI_ArrowSchema::empty()
40+
}
41+
};
42+
43+
WrappedSchema(ffi_schema)
44+
}
45+
}
46+
47+
impl From<WrappedSchema> for SchemaRef {
48+
fn from(value: WrappedSchema) -> Self {
49+
let schema = match Schema::try_from(&value.0) {
50+
Ok(s) => s,
51+
Err(e) => {
52+
error!("Unable to convert from FFI_ArrowSchema to DataFusion Schema in FFI_PlanProperties. {}", e);
53+
Schema::empty()
54+
}
55+
};
56+
Arc::new(schema)
57+
}
58+
}
59+
60+
/// This is a wrapper struct for FFI_ArrowArray to indicate to StableAbi
61+
/// that the struct is FFI Safe. For convenience, we also include the
62+
/// schema needed to create a record batch from the array.
63+
#[repr(C)]
64+
#[derive(Debug, StableAbi)]
65+
pub struct WrappedArray {
66+
#[sabi(unsafe_opaque_field)]
67+
pub array: FFI_ArrowArray,
68+
69+
pub schema: WrappedSchema,
70+
}

0 commit comments

Comments
 (0)