Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ bitflags = "1.2.1"

[features]
default = ["csv", "ipc", "test_utils"]
avx512 = []
csv = ["csv_crate"]
ipc = ["flatbuffers"]
simd = ["packed_simd"]
Expand Down
14 changes: 14 additions & 0 deletions arrow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,17 @@ cargo run --example read_csv
```

[arrow]: https://arrow.apache.org/


## Performance

Most of the compute kernels benefit a lot from being optimized for a specific CPU target.
This is especially so on x86-64 since without specifying a target the compiler can only assume support for SSE2 vector instructions.
One of the following values as `-Ctarget-cpu=value` in `RUSTFLAGS` can therefore improve performance significantly:

- `native`: Target the exact features of the cpu that the build is running on.
This should give the best performance when building and running locally, but should be used carefully for example when building in a CI pipeline or when shipping pre-compiled software.
- `x86-64-v3`: Includes AVX2 support and is close to the intel `haswell` architecture released in 2013 and should be supported by any recent Intel or Amd cpu.
- `x86-64-v4`: Includes AVX512 support available on intel `skylake` server and `icelake`/`tigerlake`/`rocketlake` laptop and desktop processors.

These flags should be used in addition to the `simd` feature, since they will also affect the code generated by the simd library.
61 changes: 53 additions & 8 deletions arrow/benches/buffer_bit_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@

#[macro_use]
extern crate criterion;
use criterion::Criterion;

use criterion::{Criterion, Throughput};

extern crate arrow;

use arrow::buffer::{Buffer, MutableBuffer};
use arrow::buffer::{
buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer,
};

/// Helper function to create arrays
fn create_buffer(size: usize) -> Buffer {
Expand All @@ -42,17 +45,59 @@ fn bench_buffer_or(left: &Buffer, right: &Buffer) {
criterion::black_box((left | right).unwrap());
}

fn bench_buffer_not(buffer: &Buffer) {
criterion::black_box(!buffer);
}

fn bench_buffer_and_with_offsets(
left: &Buffer,
left_offset: usize,
right: &Buffer,
right_offset: usize,
len: usize,
) {
criterion::black_box(buffer_bin_and(left, left_offset, right, right_offset, len));
}

fn bench_buffer_or_with_offsets(
left: &Buffer,
left_offset: usize,
right: &Buffer,
right_offset: usize,
len: usize,
) {
criterion::black_box(buffer_bin_or(left, left_offset, right, right_offset, len));
}

fn bench_buffer_not_with_offsets(buffer: &Buffer, offset: usize, len: usize) {
criterion::black_box(buffer_unary_not(buffer, offset, len));
}

fn bit_ops_benchmark(c: &mut Criterion) {
let left = create_buffer(512 * 10);
let right = create_buffer(512 * 10);

c.bench_function("buffer_bit_ops and", |b| {
b.iter(|| bench_buffer_and(&left, &right))
});
c.benchmark_group("buffer_binary_ops")
.throughput(Throughput::Bytes(3 * left.len() as u64))
.bench_function("and", |b| b.iter(|| bench_buffer_and(&left, &right)))
.bench_function("or", |b| b.iter(|| bench_buffer_or(&left, &right)))
.bench_function("and_with_offset", |b| {
b.iter(|| {
bench_buffer_and_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5)
})
})
.bench_function("or_with_offset", |b| {
b.iter(|| {
bench_buffer_or_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5)
})
});

c.bench_function("buffer_bit_ops or", |b| {
b.iter(|| bench_buffer_or(&left, &right))
});
c.benchmark_group("buffer_unary_ops")
.throughput(Throughput::Bytes(2 * left.len() as u64))
.bench_function("not", |b| b.iter(|| bench_buffer_not(&left)))
.bench_function("not_with_offset", |b| {
b.iter(|| bench_buffer_not_with_offsets(&left, 1, left.len() * 8 - 5))
});
}

criterion_group!(benches, bit_ops_benchmark);
Expand Down
73 changes: 0 additions & 73 deletions arrow/src/arch/avx512.rs

This file was deleted.

22 changes: 0 additions & 22 deletions arrow/src/arch/mod.rs

This file was deleted.

Loading