Skip to content

Patched DataFusion version 45.0.0 #54

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: base-df-upgrade-ver45
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions .github/actions/setup-rust-runtime/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ description: 'Setup Rust Runtime Environment'
runs:
using: "composite"
steps:
- name: Run sccache-cache
uses: mozilla-actions/[email protected]
# https://github.com/apache/datafusion/issues/15535
# disabled because neither version nor git hash works with apache github policy
#- name: Run sccache-cache
# uses: mozilla-actions/sccache-action@65101d47ea8028ed0c98a1cdea8dd9182e9b5133 # v0.0.8
- name: Configure runtime env
shell: bash
# do not produce debug symbols to keep memory usage down
Expand All @@ -30,9 +32,11 @@ runs:
#
# Set debuginfo=line-tables-only as debuginfo=0 causes immensely slow build
# See for more details: https://github.com/rust-lang/rust/issues/119560
#
# readd the following to the run below once sccache-cache is re-enabled
# echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV
# echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV
run: |
echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV
echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV
echo "RUST_BACKTRACE=1" >> $GITHUB_ENV
echo "RUSTFLAGS=-C debuginfo=line-tables-only -C incremental=false" >> $GITHUB_ENV

2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ jobs:
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Build with wasm-pack
working-directory: ./datafusion/wasmtest
run: wasm-pack build --dev
run: RUSTFLAGS='--cfg getrandom_backend="wasm_js"' wasm-pack build --dev

# verify that the benchmark queries return the correct results
verify-benchmark-results:
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ homepage = "https://datafusion.apache.org"
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/datafusion"
rust-version = "1.81.0"
rust-version = "1.82.0"
version = "45.0.0"

[workspace.dependencies]
Expand Down
1 change: 1 addition & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ config_namespace! {
///
/// This is used to workaround bugs in the planner that are now caught by
/// the new schema verification step.
pub skip_physical_aggregate_schema_check: bool, default = false
pub skip_physical_aggregate_schema_check: bool, default = true

/// Specifies the reserved memory for each spillable sort operation to
/// facilitate an in-memory merge.
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub enum DataFusionError {
ParquetError(ParquetError),
/// Error when reading Avro data.
#[cfg(feature = "avro")]
AvroError(AvroError),
AvroError(Box<AvroError>),
/// Error when reading / writing to / from an object_store (e.g. S3 or LocalFile)
#[cfg(feature = "object_store")]
ObjectStore(object_store::Error),
Expand Down Expand Up @@ -267,7 +267,7 @@ impl From<ParquetError> for DataFusionError {
#[cfg(feature = "avro")]
impl From<AvroError> for DataFusionError {
fn from(e: AvroError) -> Self {
DataFusionError::AvroError(e)
DataFusionError::AvroError(Box::new(e))
}
}

Expand Down
7 changes: 3 additions & 4 deletions datafusion/common/src/table_reference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,17 +193,16 @@ impl TableReference {
match self {
TableReference::Bare { table } => **table == *other.table(),
TableReference::Partial { schema, table } => {
**table == *other.table()
&& other.schema().map_or(true, |s| *s == **schema)
**table == *other.table() && other.schema().is_none_or(|s| *s == **schema)
}
TableReference::Full {
catalog,
schema,
table,
} => {
**table == *other.table()
&& other.schema().map_or(true, |s| *s == **schema)
&& other.catalog().map_or(true, |c| *c == **catalog)
&& other.schema().is_none_or(|s| *s == **schema)
&& other.catalog().is_none_or(|c| *c == **catalog)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ tempfile = { workspace = true }
tokio = { workspace = true }
tokio-util = { version = "0.7.4", features = ["io"], optional = true }
url = { workspace = true }
uuid = { version = "1.7", features = ["v4"] }
uuid = { version = "1.7", features = ["v4", "js"] }
xz2 = { version = "0.1", optional = true, features = ["static"] }
zstd = { version = "0.13", optional = true, default-features = false }

Expand Down
6 changes: 2 additions & 4 deletions datafusion/core/src/datasource/avro_to_arrow/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::arrow::datatypes::{DataType, IntervalUnit, Schema, TimeUnit, UnionMode};
use crate::error::{DataFusionError, Result};
use crate::error::Result;
use apache_avro::schema::{
Alias, DecimalSchema, EnumSchema, FixedSchema, Name, RecordSchema,
};
Expand Down Expand Up @@ -107,9 +107,7 @@ fn schema_to_field_with_props(
.data_type()
.clone()
} else {
return Err(DataFusionError::AvroError(
apache_avro::Error::GetUnionDuplicate,
));
return Err(apache_avro::Error::GetUnionDuplicate.into());
}
} else {
let fields = sub_schemas
Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/src/datasource/file_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
Schema::new_with_metadata(transformed_fields, schema.metadata.clone())
}

#[cfg(not(target_arch = "wasm32"))]
/// Coerces the file schema if the table schema uses a view type.
pub(crate) fn coerce_file_schema_to_view_type(
table_schema: &Schema,
Expand Down Expand Up @@ -486,6 +487,7 @@ pub fn transform_binary_to_string(schema: &Schema) -> Schema {
Schema::new_with_metadata(transformed_fields, schema.metadata.clone())
}

#[cfg(not(target_arch = "wasm32"))]
/// If the table schema uses a string type, coerce the file schema to use a string type.
///
/// See [parquet::ParquetFormat::binary_as_string] for details
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/listing/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2179,7 +2179,7 @@ mod tests {

// create table
let tmp_dir = TempDir::new()?;
let tmp_path = tmp_dir.into_path();
let tmp_path = tmp_dir.keep();
let str_path = tmp_path.to_str().expect("Temp path should convert to &str");
session_ctx
.sql(&format!(
Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/src/datasource/listing/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,10 @@ impl std::fmt::Display for ListingTableUrl {
}
}

#[cfg(not(target_arch = "wasm32"))]
const GLOB_START_CHARS: [char; 3] = ['?', '*', '['];

#[cfg(not(target_arch = "wasm32"))]
/// Splits `path` at the first path segment containing a glob expression, returning
/// `None` if no glob expression found.
///
Expand Down
3 changes: 3 additions & 0 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,9 @@ impl DefaultPhysicalPlanner {
differences.push(format!("field nullability at index {} [{}]: (physical) {} vs (logical) {}", i, physical_field.name(), physical_field.is_nullable(), logical_field.is_nullable()));
}
}

log::warn!("Physical input schema should be the same as the one converted from logical input schema, but did not match for logical plan:\n{}", input.display_indent());

return internal_err!("Physical input schema should be the same as the one converted from logical input schema. Differences: {}", differences
.iter()
.map(|s| format!("\n\t- {}", s))
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/execution/logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ use std::fmt::Debug;
use std::ops::Deref;
use std::sync::Arc;

///! Logical plans need to provide stable semantics, as downstream projects
///! create them and depend on them. Test executable semantics of logical plans.
// Logical plans need to provide stable semantics, as downstream projects
// create them and depend on them. Test executable semantics of logical plans.

#[tokio::test]
async fn count_only_nulls() -> Result<()> {
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/memory_limit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ async fn oom_recursive_cte() {
#[tokio::test]
async fn oom_parquet_sink() {
let dir = tempfile::tempdir().unwrap();
let path = dir.into_path().join("test.parquet");
let path = dir.keep().join("test.parquet");
let _ = File::create(path.clone()).await.unwrap();

TestCase::new()
Expand All @@ -371,7 +371,7 @@ async fn oom_parquet_sink() {
#[tokio::test]
async fn oom_with_tracked_consumer_pool() {
let dir = tempfile::tempdir().unwrap();
let path = dir.into_path().join("test.parquet");
let path = dir.keep().join("test.parquet");
let _ = File::create(path.clone()).await.unwrap();

TestCase::new()
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/parquet/file_statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,15 @@ async fn list_files_with_session_level_cache() {

let temp_path1 = tempdir()
.unwrap()
.into_path()
.keep()
.into_os_string()
.into_string()
.unwrap();
let temp_filename1 = format!("{}/{}", temp_path1, p_name);

let temp_path2 = tempdir()
.unwrap()
.into_path()
.keep()
.into_os_string()
.into_string()
.unwrap();
Expand Down
Loading
Loading