Skip to content

Variant shredding #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 25 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7b7aad2
Upgrade tonic dependencies to 0.13.0 version (try 2) (#7839)
alamb Jul 16, 2025
0055f57
[Variant] Reserve capacity beforehand during large object building (#…
friendlymatthew Jul 16, 2025
7af62d5
[Variant] Support appending complex variants in `VariantBuilder` (#7914)
friendlymatthew Jul 16, 2025
d4c0a32
[Variant] Add `variant_get` compute kernel (#7919)
Samyak2 Jul 16, 2025
03a837e
Add tests for `BatchCoalescer::push_batch_with_filter`, fix bug (#7774)
alamb Jul 16, 2025
d809f19
[Variant] Add documentation, tests and cleaner api for Variant::get_p…
alamb Jul 17, 2025
7089786
[Variant] Avoid collecting offset iterator (#7934)
codephage2020 Jul 17, 2025
dfe907f
Minor: Support BinaryView and StringView builders in `make_builder` (…
kylebarron Jul 17, 2025
d0fa24e
[Variant] Impl `PartialEq` for VariantObject (#7943)
friendlymatthew Jul 17, 2025
233dad3
Optimize partition_validity function used in sort kernels (#7937)
jhorstmann Jul 18, 2025
722ef59
[Variant] Add ObjectBuilder::with_field for convenience (#7950)
alamb Jul 18, 2025
a984ca7
[Variant] Adding code to store metadata and value references in Varia…
abacef Jul 18, 2025
a5afda2
[Variant] VariantMetadata is allowed to contain the empty string (#7956)
scovich Jul 18, 2025
71dd48e
[Variant] Add `variant_kernels` benchmark (#7944)
alamb Jul 18, 2025
a15f345
[Variant] Add ListBuilder::with_value for convenience (#7959)
codephage2020 Jul 18, 2025
4f5ab12
[Test] Add tests for VariantList equality (#7953)
alamb Jul 18, 2025
55fbf5c
[Variant] remove VariantMetadata::dictionary_size (#7958)
codephage2020 Jul 18, 2025
99eb1bc
Add missing `parquet-variant-compute` crate to CI jobs (#7963)
alamb Jul 18, 2025
08353cb
[ADD] Path-based field extraction for VariantArray
carpecodeum Jul 16, 2025
aeccf2b
[FIX] sanitise variant_array file
carpecodeum Jul 16, 2025
314a599
[ADD] add hybrid approach for field access
carpecodeum Jul 16, 2025
de9d386
[FIX] fix variant_array implementation
carpecodeum Jul 16, 2025
8e4b034
[ADD] add support for path operations on different data types
carpecodeum Jul 16, 2025
be50708
[FIX] minor fixes
carpecodeum Jul 16, 2025
c712747
[FIX] fix formatting issues
carpecodeum Jul 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/arrow_flight.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
cargo test -p arrow-flight --all-features
- name: Test --examples
run: |
cargo test -p arrow-flight --features=flight-sql,tls --examples
cargo test -p arrow-flight --features=flight-sql,tls-ring --examples

vendor:
name: Verify Vendored Code
Expand Down
16 changes: 12 additions & 4 deletions .github/workflows/parquet-variant.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ on:
pull_request:
paths:
- parquet-variant/**
- parquet-variant-json/**
- parquet-variant-compute/**
- .github/**

jobs:
Expand All @@ -50,6 +52,8 @@ jobs:
run: cargo test -p parquet-variant
- name: Test parquet-variant-json
run: cargo test -p parquet-variant-json
- name: Test parquet-variant-compute
run: cargo test -p parquet-variant-compute

# test compilation
linux-features:
Expand All @@ -63,10 +67,12 @@ jobs:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
- name: Check compilation
- name: Check compilation (parquet-variant)
run: cargo check -p parquet-variant
- name: Check compilation
- name: Check compilation (parquet-variant-json)
run: cargo check -p parquet-variant-json
- name: Check compilation (parquet-variant-compute)
run: cargo check -p parquet-variant-compute

clippy:
name: Clippy
Expand All @@ -79,7 +85,9 @@ jobs:
uses: ./.github/actions/setup-builder
- name: Setup Clippy
run: rustup component add clippy
- name: Run clippy
- name: Run clippy (parquet-variant)
run: cargo clippy -p parquet-variant --all-targets --all-features -- -D warnings
- name: Run clippy
- name: Run clippy (parquet-variant-json)
run: cargo clippy -p parquet-variant-json --all-targets --all-features -- -D warnings
- name: Run clippy (parquet-variant-compute)
run: cargo clippy -p parquet-variant-compute --all-targets --all-features -- -D warnings
2 changes: 2 additions & 0 deletions arrow-array/src/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
DataType::BinaryView => Box::new(BinaryViewBuilder::with_capacity(capacity)),
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
Expand All @@ -464,6 +465,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
),
DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
DataType::Utf8View => Box::new(StringViewBuilder::with_capacity(capacity)),
DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
DataType::Time32(TimeUnit::Second) => {
Expand Down
18 changes: 11 additions & 7 deletions arrow-flight/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ prost = { version = "0.13.1", default-features = false, features = ["prost-deriv
# For Timestamp type
prost-types = { version = "0.13.1", default-features = false }
tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"], optional = true }
tonic = { version = "0.12.3", default-features = false, features = ["transport", "codegen", "prost"] }
tonic = { version = "0.13", default-features = false, features = ["transport", "codegen", "prost", "router"] }

# CLI-related dependencies
anyhow = { version = "1.0", optional = true }
Expand All @@ -64,9 +64,13 @@ default = []
flight-sql = ["dep:arrow-arith", "dep:arrow-data", "dep:arrow-ord", "dep:arrow-row", "dep:arrow-select", "dep:arrow-string", "dep:once_cell", "dep:paste"]
# TODO: Remove in the next release
flight-sql-experimental = ["flight-sql"]
tls = ["tonic/tls"]
tls-aws-lc= ["tonic/tls-aws-lc"]
tls-native-roots = ["tonic/tls-native-roots"]
tls-ring = ["tonic/tls-ring"]
tls-webpki-roots = ["tonic/tls-webpki-roots"]

# Enable CLI tools
cli = ["arrow-array/chrono-tz", "arrow-cast/prettyprint", "tonic/tls-webpki-roots", "dep:anyhow", "dep:clap", "dep:tracing-log", "dep:tracing-subscriber"]
cli = ["arrow-array/chrono-tz", "arrow-cast/prettyprint", "tonic/tls-webpki-roots", "dep:anyhow", "dep:clap", "dep:tracing-log", "dep:tracing-subscriber", "dep:tokio"]

[dev-dependencies]
arrow-cast = { workspace = true, features = ["prettyprint"] }
Expand All @@ -85,18 +89,18 @@ uuid = { version = "1.10.0", features = ["v4"] }

[[example]]
name = "flight_sql_server"
required-features = ["flight-sql", "tls"]
required-features = ["flight-sql", "tls-ring"]

[[bin]]
name = "flight_sql_client"
required-features = ["cli", "flight-sql", "tls"]
required-features = ["cli", "flight-sql", "tls-ring"]

[[test]]
name = "flight_sql_client"
path = "tests/flight_sql_client.rs"
required-features = ["flight-sql", "tls"]
required-features = ["flight-sql", "tls-ring"]

[[test]]
name = "flight_sql_client_cli"
path = "tests/flight_sql_client_cli.rs"
required-features = ["cli", "flight-sql", "tls"]
required-features = ["cli", "flight-sql", "tls-ring"]
9 changes: 8 additions & 1 deletion arrow-flight/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,14 @@ that demonstrate how to build a Flight server implemented with [tonic](https://d

- `flight-sql`: Support for [Apache Arrow FlightSQL], a protocol for interacting with SQL databases.

- `tls`: Enables `tls` on `tonic`
You can enable TLS using the following features (not enabled by default)

- `tls-aws-lc`: enables [tonic feature] `tls-aws-lc`
- `tls-native-roots`: enables [tonic feature] `tls-native-roots`
- `tls-ring`: enables [tonic feature] `tls-ring`
- `tls-webpki`: enables [tonic feature] `tls-webpki-roots`

[tonic feature]: https://docs.rs/tonic/latest/tonic/#feature-flags

## CLI

Expand Down
2 changes: 1 addition & 1 deletion arrow-flight/examples/flight_sql_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ mod tests {
async fn bind_tcp() -> (TcpIncoming, SocketAddr) {
let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
let addr = listener.local_addr().unwrap();
let incoming = TcpIncoming::from_listener(listener, true, None).unwrap();
let incoming = TcpIncoming::from(listener).with_nodelay(Some(true));
(incoming, addr)
}

Expand Down
2 changes: 1 addition & 1 deletion arrow-flight/gen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ publish = false
# Pin specific version of the tonic-build dependencies to avoid auto-generated
# (and checked in) arrow.flight.protocol.rs from changing
prost-build = { version = "=0.13.5", default-features = false }
tonic-build = { version = "=0.12.3", default-features = false, features = ["transport", "prost"] }
tonic-build = { version = "=0.13.1", default-features = false, features = ["transport", "prost"] }
14 changes: 8 additions & 6 deletions arrow-flight/src/arrow.flight.protocol.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion arrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ prost = { version = "0.13", default-features = false }
serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
tokio = { version = "1.0", default-features = false, features = [ "rt-multi-thread"] }
tonic = { version = "0.12", default-features = false }
tonic = { version = "0.13", default-features = false }
tracing-subscriber = { version = "0.3.1", default-features = false, features = ["fmt"], optional = true }
flate2 = { version = "1", default-features = false, features = ["rust_backend"] }

Expand Down
40 changes: 34 additions & 6 deletions arrow-ord/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,41 @@ where

// partition indices into valid and null indices
fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
match array.null_count() {
// faster path
0 => ((0..(array.len() as u32)).collect(), vec![]),
_ => {
let indices = 0..(array.len() as u32);
indices.partition(|index| array.is_valid(*index as usize))
let len = array.len();
let null_count = array.null_count();
match array.nulls() {
Some(nulls) if null_count > 0 => {
let mut valid_indices = Vec::with_capacity(len - null_count);
let mut null_indices = Vec::with_capacity(null_count);

let valid_slice = valid_indices.spare_capacity_mut();
let null_slice = null_indices.spare_capacity_mut();
let mut valid_idx = 0;
let mut null_idx = 0;

nulls.into_iter().enumerate().for_each(|(i, v)| {
if v {
valid_slice[valid_idx].write(i as u32);
valid_idx += 1;
} else {
null_slice[null_idx].write(i as u32);
null_idx += 1;
}
});

assert_eq!(null_idx, null_count);
assert_eq!(valid_idx, len - null_count);
// Safety: The new lengths match the initial capacity as asserted above,
// the bounds checks while writing also ensure they less than or equal to the capacity.
unsafe {
valid_indices.set_len(valid_idx);
null_indices.set_len(null_idx);
}

(valid_indices, null_indices)
}
// faster path
_ => ((0..(len as u32)).collect(), vec![]),
}
}

Expand Down
Loading
Loading