Skip to content

Commit 9595b8d

Browse files
Brent Gardnerxudong963alamb
authored
Add serde for plans with tables from TableProviderFactorys (#3907)
* Can compile and run test Failing on scheduler due to no factories Tests pass Back to "no object store available for delta-rs://home-bgardner-workspace" Switch back to git refs CI fixes Add roundtrip test Passing deltalake test Passing serde test Remove unrelated refactor Formatting Fix typo that was hard to debug CI fixes delta & ballista tests pass * Take Andy's advice and turn it async * Fix CI * No suitable object store on executor * Fix test * Fix test * Bump CI * Update datafusion/core/src/datasource/datasource.rs Co-authored-by: xudong.w <[email protected]> * Update datafusion/proto/src/bytes/mod.rs Co-authored-by: Andrew Lamb <[email protected]> * Update datafusion/proto/src/bytes/mod.rs Co-authored-by: Andrew Lamb <[email protected]> Co-authored-by: xudong.w <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent e1f866e commit 9595b8d

File tree

11 files changed

+372
-89
lines changed

11 files changed

+372
-89
lines changed

datafusion/core/src/datasource/datasource.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,6 @@ pub trait TableProvider: Sync + Send {
8585
/// from a directory of files only when that name is referenced.
8686
#[async_trait]
8787
pub trait TableProviderFactory: Sync + Send {
88-
/// Create a TableProvider given name and url
89-
async fn create(&self, name: &str, url: &str) -> Result<Arc<dyn TableProvider>>;
88+
/// Create a TableProvider with the given url
89+
async fn create(&self, url: &str) -> Result<Arc<dyn TableProvider>>;
9090
}

datafusion/core/src/execution/context.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -418,19 +418,18 @@ impl SessionContext {
418418
cmd: &CreateExternalTable,
419419
) -> Result<Arc<DataFrame>> {
420420
let state = self.state.read().clone();
421+
let file_type = cmd.file_type.to_lowercase();
421422
let factory = &state
422423
.runtime_env
423424
.table_factories
424-
.get(&cmd.file_type)
425+
.get(file_type.as_str())
425426
.ok_or_else(|| {
426427
DataFusionError::Execution(format!(
427428
"Unable to find factory for {}",
428429
cmd.file_type
429430
))
430431
})?;
431-
let table = (*factory)
432-
.create(cmd.name.as_str(), cmd.location.as_str())
433-
.await?;
432+
let table = (*factory).create(cmd.location.as_str()).await?;
434433
self.register_table(cmd.name.as_str(), table)?;
435434
let plan = LogicalPlanBuilder::empty(false).build()?;
436435
Ok(Arc::new(DataFrame::new(self.state.clone(), &plan)))

datafusion/core/src/test_util.rs

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,19 @@
1717

1818
//! Utility functions to make testing DataFusion based crates easier
1919
20+
use std::any::Any;
2021
use std::collections::BTreeMap;
2122
use std::{env, error::Error, path::PathBuf, sync::Arc};
2223

23-
use crate::datasource::{empty::EmptyTable, provider_as_source};
24+
use crate::datasource::datasource::TableProviderFactory;
25+
use crate::datasource::{empty::EmptyTable, provider_as_source, TableProvider};
26+
use crate::execution::context::SessionState;
2427
use crate::logical_expr::{LogicalPlanBuilder, UNNAMED_TABLE};
28+
use crate::physical_plan::ExecutionPlan;
2529
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
30+
use async_trait::async_trait;
2631
use datafusion_common::DataFusionError;
32+
use datafusion_expr::{Expr, TableType};
2733

2834
/// Compares formatted output of a record batch with an expected
2935
/// vector of strings, with the result of pretty formatting record
@@ -317,6 +323,58 @@ pub fn aggr_test_schema_with_missing_col() -> SchemaRef {
317323
Arc::new(schema)
318324
}
319325

326+
/// TableFactory for tests
327+
pub struct TestTableFactory {}
328+
329+
#[async_trait]
330+
impl TableProviderFactory for TestTableFactory {
331+
async fn create(
332+
&self,
333+
url: &str,
334+
) -> datafusion_common::Result<Arc<dyn TableProvider>> {
335+
Ok(Arc::new(TestTableProvider {
336+
url: url.to_string(),
337+
}))
338+
}
339+
}
340+
341+
/// TableProvider for testing purposes
342+
pub struct TestTableProvider {
343+
/// URL of table files or folder
344+
pub url: String,
345+
}
346+
347+
impl TestTableProvider {}
348+
349+
#[async_trait]
350+
impl TableProvider for TestTableProvider {
351+
fn as_any(&self) -> &dyn Any {
352+
self
353+
}
354+
355+
fn schema(&self) -> SchemaRef {
356+
let schema = Schema::new(vec![
357+
Field::new("a", DataType::Int64, true),
358+
Field::new("b", DataType::Decimal128(15, 2), true),
359+
]);
360+
Arc::new(schema)
361+
}
362+
363+
fn table_type(&self) -> TableType {
364+
unimplemented!("TestTableProvider is a stub for testing.")
365+
}
366+
367+
async fn scan(
368+
&self,
369+
_ctx: &SessionState,
370+
_projection: &Option<Vec<usize>>,
371+
_filters: &[Expr],
372+
_limit: Option<usize>,
373+
) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
374+
unimplemented!("TestTableProvider is a stub for testing.")
375+
}
376+
}
377+
320378
#[cfg(test)]
321379
mod tests {
322380
use super::*;

datafusion/core/tests/sql/create_drop.rs

Lines changed: 2 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,12 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use async_trait::async_trait;
19-
use std::any::Any;
2018
use std::collections::HashMap;
2119
use std::io::Write;
2220

2321
use datafusion::datasource::datasource::TableProviderFactory;
24-
use datafusion::execution::context::SessionState;
2522
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
26-
use datafusion_expr::TableType;
23+
use datafusion::test_util::TestTableFactory;
2724
use tempfile::TempDir;
2825

2926
use super::*;
@@ -369,49 +366,11 @@ async fn create_pipe_delimited_csv_table() -> Result<()> {
369366
Ok(())
370367
}
371368

372-
struct TestTableProvider {}
373-
374-
impl TestTableProvider {}
375-
376-
#[async_trait]
377-
impl TableProvider for TestTableProvider {
378-
fn as_any(&self) -> &dyn Any {
379-
unimplemented!("TestTableProvider is a stub for testing.")
380-
}
381-
382-
fn schema(&self) -> SchemaRef {
383-
unimplemented!("TestTableProvider is a stub for testing.")
384-
}
385-
386-
fn table_type(&self) -> TableType {
387-
unimplemented!("TestTableProvider is a stub for testing.")
388-
}
389-
390-
async fn scan(
391-
&self,
392-
_ctx: &SessionState,
393-
_projection: &Option<Vec<usize>>,
394-
_filters: &[Expr],
395-
_limit: Option<usize>,
396-
) -> Result<Arc<dyn ExecutionPlan>> {
397-
unimplemented!("TestTableProvider is a stub for testing.")
398-
}
399-
}
400-
401-
struct TestTableFactory {}
402-
403-
#[async_trait]
404-
impl TableProviderFactory for TestTableFactory {
405-
async fn create(&self, _name: &str, _url: &str) -> Result<Arc<dyn TableProvider>> {
406-
Ok(Arc::new(TestTableProvider {}))
407-
}
408-
}
409-
410369
#[tokio::test]
411370
async fn create_custom_table() -> Result<()> {
412371
let mut table_factories: HashMap<String, Arc<dyn TableProviderFactory>> =
413372
HashMap::new();
414-
table_factories.insert("DELTATABLE".to_string(), Arc::new(TestTableFactory {}));
373+
table_factories.insert("deltatable".to_string(), Arc::new(TestTableFactory {}));
415374
let cfg = RuntimeConfig::new().with_table_factories(table_factories);
416375
let env = RuntimeEnv::new(cfg).unwrap();
417376
let ses = SessionConfig::new();

datafusion/proto/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ json = ["pbjson", "pbjson-build", "serde", "serde_json"]
4141

4242
[dependencies]
4343
arrow = "25.0.0"
44+
async-trait = "0.1.41"
4445
datafusion = { path = "../core", version = "13.0.0" }
4546
datafusion-common = { path = "../common", version = "13.0.0" }
4647
datafusion-expr = { path = "../expr", version = "13.0.0" }

datafusion/proto/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ async fn main() -> Result<()> {
6363
?;
6464
let plan = ctx.table("t1")?.to_logical_plan()?;
6565
let bytes = logical_plan_to_bytes(&plan)?;
66-
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
66+
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx).await?;
6767
assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip));
6868
Ok(())
6969
}

datafusion/proto/examples/plan_serde.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ async fn main() -> Result<()> {
2626
.await?;
2727
let plan = ctx.table("t1")?.to_logical_plan()?;
2828
let bytes = logical_plan_to_bytes(&plan)?;
29-
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
29+
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx).await?;
3030
assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip));
3131
Ok(())
3232
}

datafusion/proto/proto/datafusion.proto

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ message LogicalPlanNode {
7070
CreateViewNode create_view = 22;
7171
DistinctNode distinct = 23;
7272
ViewTableScanNode view_scan = 24;
73+
CustomTableScanNode custom_scan = 25;
7374
}
7475
}
7576

@@ -118,6 +119,15 @@ message ViewTableScanNode {
118119
string definition = 5;
119120
}
120121

122+
// Logical Plan to Scan a CustomTableProvider registered at runtime
123+
message CustomTableScanNode {
124+
string table_name = 1;
125+
ProjectionColumns projection = 2;
126+
datafusion.Schema schema = 3;
127+
repeated datafusion.LogicalExprNode filters = 4;
128+
bytes custom_table_data = 5;
129+
}
130+
121131
message ProjectionNode {
122132
LogicalPlanNode input = 1;
123133
repeated datafusion.LogicalExprNode expr = 2;

datafusion/proto/src/bytes/mod.rs

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,16 @@
1818
//! Serialization / Deserialization to Bytes
1919
use crate::logical_plan::{AsLogicalPlan, LogicalExtensionCodec};
2020
use crate::{from_proto::parse_expr, protobuf};
21+
use arrow::datatypes::SchemaRef;
22+
use async_trait::async_trait;
23+
use datafusion::datasource::TableProvider;
2124
use datafusion_common::{DataFusionError, Result};
2225
use datafusion_expr::{Expr, Extension, LogicalPlan};
2326
use prost::{
2427
bytes::{Bytes, BytesMut},
2528
Message,
2629
};
30+
use std::sync::Arc;
2731

2832
// Reexport Bytes which appears in the API
2933
use datafusion::execution::registry::FunctionRegistry;
@@ -132,37 +136,41 @@ pub fn logical_plan_to_bytes_with_extension_codec(
132136

133137
/// Deserialize a LogicalPlan from json
134138
#[cfg(feature = "json")]
135-
pub fn logical_plan_from_json(json: &str, ctx: &SessionContext) -> Result<LogicalPlan> {
139+
pub async fn logical_plan_from_json(
140+
json: &str,
141+
ctx: &SessionContext,
142+
) -> Result<LogicalPlan> {
136143
let back: protobuf::LogicalPlanNode = serde_json::from_str(json)
137144
.map_err(|e| DataFusionError::Plan(format!("Error serializing plan: {}", e)))?;
138145
let extension_codec = DefaultExtensionCodec {};
139-
back.try_into_logical_plan(ctx, &extension_codec)
146+
back.try_into_logical_plan(ctx, &extension_codec).await
140147
}
141148

142149
/// Deserialize a LogicalPlan from bytes
143-
pub fn logical_plan_from_bytes(
150+
pub async fn logical_plan_from_bytes(
144151
bytes: &[u8],
145152
ctx: &SessionContext,
146153
) -> Result<LogicalPlan> {
147154
let extension_codec = DefaultExtensionCodec {};
148-
logical_plan_from_bytes_with_extension_codec(bytes, ctx, &extension_codec)
155+
logical_plan_from_bytes_with_extension_codec(bytes, ctx, &extension_codec).await
149156
}
150157

151158
/// Deserialize a LogicalPlan from bytes
152-
pub fn logical_plan_from_bytes_with_extension_codec(
159+
pub async fn logical_plan_from_bytes_with_extension_codec(
153160
bytes: &[u8],
154161
ctx: &SessionContext,
155162
extension_codec: &dyn LogicalExtensionCodec,
156163
) -> Result<LogicalPlan> {
157164
let protobuf = protobuf::LogicalPlanNode::decode(bytes).map_err(|e| {
158165
DataFusionError::Plan(format!("Error decoding expr as protobuf: {}", e))
159166
})?;
160-
protobuf.try_into_logical_plan(ctx, extension_codec)
167+
protobuf.try_into_logical_plan(ctx, extension_codec).await
161168
}
162169

163170
#[derive(Debug)]
164171
struct DefaultExtensionCodec {}
165172

173+
#[async_trait]
166174
impl LogicalExtensionCodec for DefaultExtensionCodec {
167175
fn try_decode(
168176
&self,
@@ -180,6 +188,27 @@ impl LogicalExtensionCodec for DefaultExtensionCodec {
180188
"No extension codec provided".to_string(),
181189
))
182190
}
191+
192+
async fn try_decode_table_provider(
193+
&self,
194+
_buf: &[u8],
195+
_schema: SchemaRef,
196+
_ctx: &SessionContext,
197+
) -> std::result::Result<Arc<dyn TableProvider>, DataFusionError> {
198+
Err(DataFusionError::NotImplemented(
199+
"No codec provided to for TableProviders".to_string(),
200+
))
201+
}
202+
203+
fn try_encode_table_provider(
204+
&self,
205+
_node: Arc<dyn TableProvider>,
206+
_buf: &mut Vec<u8>,
207+
) -> std::result::Result<(), DataFusionError> {
208+
Err(DataFusionError::NotImplemented(
209+
"No codec provided to for TableProviders".to_string(),
210+
))
211+
}
183212
}
184213

185214
#[cfg(test)]
@@ -214,12 +243,12 @@ mod test {
214243
assert_eq!(actual, expected);
215244
}
216245

217-
#[test]
246+
#[tokio::test]
218247
#[cfg(feature = "json")]
219-
fn json_to_plan() {
248+
async fn json_to_plan() {
220249
let input = r#"{"emptyRelation":{}}"#.to_string();
221250
let ctx = SessionContext::new();
222-
let actual = logical_plan_from_json(&input, &ctx).unwrap();
251+
let actual = logical_plan_from_json(&input, &ctx).await.unwrap();
223252
let result = matches!(actual, LogicalPlan::EmptyRelation(_));
224253
assert!(result, "Should parse empty relation");
225254
}

0 commit comments

Comments
 (0)