Skip to content

Commit 3c3faf2

Browse files
committed
Merge remote-tracking branch 'upstream/master' into split-out-arrow-schema
2 parents d963c54 + fb01656 commit 3c3faf2

29 files changed

+946
-674
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@
1919
members = [
2020
"arrow",
2121
"arrow-schema",
22+
"arrow-buffer",
23+
"arrow-flight",
2224
"parquet",
2325
"parquet_derive",
2426
"parquet_derive_test",
25-
"arrow-flight",
2627
"integration-testing",
2728
"object_store",
2829
]

arrow-buffer/Cargo.toml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "arrow-buffer"
20+
version = "22.0.0"
21+
description = "Buffer abstractions for Apache Arrow"
22+
homepage = "https://github.com/apache/arrow-rs"
23+
repository = "https://github.com/apache/arrow-rs"
24+
authors = ["Apache Arrow <[email protected]>"]
25+
license = "Apache-2.0"
26+
keywords = ["arrow"]
27+
include = [
28+
"benches/*.rs",
29+
"src/**/*.rs",
30+
"Cargo.toml",
31+
]
32+
edition = "2021"
33+
rust-version = "1.62"
34+
35+
[lib]
36+
name = "arrow_buffer"
37+
path = "src/lib.rs"
38+
bench = false
39+
40+
[dependencies]
41+
num = { version = "0.4", default-features = false, features = ["std"] }
42+
half = { version = "2.0", default-features = false }
43+
44+
[dev-dependencies]
45+
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
46+
47+
[build-dependencies]
File renamed without changes.

arrow/src/alloc/mod.rs renamed to arrow-buffer/src/alloc/mod.rs

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,34 +20,29 @@
2020
2121
use std::alloc::{handle_alloc_error, Layout};
2222
use std::fmt::{Debug, Formatter};
23-
use std::mem::size_of;
2423
use std::panic::RefUnwindSafe;
2524
use std::ptr::NonNull;
2625
use std::sync::Arc;
2726

2827
mod alignment;
29-
mod types;
3028

3129
pub use alignment::ALIGNMENT;
32-
pub use types::NativeType;
3330

3431
#[inline]
35-
unsafe fn null_pointer<T: NativeType>() -> NonNull<T> {
36-
NonNull::new_unchecked(ALIGNMENT as *mut T)
32+
unsafe fn null_pointer() -> NonNull<u8> {
33+
NonNull::new_unchecked(ALIGNMENT as *mut u8)
3734
}
3835

3936
/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values.
4037
/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have
4138
/// an unknown or non-zero value and is semantically similar to `malloc`.
42-
pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
39+
pub fn allocate_aligned(size: usize) -> NonNull<u8> {
4340
unsafe {
4441
if size == 0 {
4542
null_pointer()
4643
} else {
47-
let size = size * size_of::<T>();
48-
4944
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
50-
let raw_ptr = std::alloc::alloc(layout) as *mut T;
45+
let raw_ptr = std::alloc::alloc(layout);
5146
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
5247
}
5348
}
@@ -56,15 +51,13 @@ pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> {
5651
/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them.
5752
/// This is more performant than using [allocate_aligned] and setting all bytes to zero
5853
/// and is semantically similar to `calloc`.
59-
pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
54+
pub fn allocate_aligned_zeroed(size: usize) -> NonNull<u8> {
6055
unsafe {
6156
if size == 0 {
6257
null_pointer()
6358
} else {
64-
let size = size * size_of::<T>();
65-
6659
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT);
67-
let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T;
60+
let raw_ptr = std::alloc::alloc_zeroed(layout);
6861
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout))
6962
}
7063
}
@@ -78,9 +71,8 @@ pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> {
7871
/// * ptr must denote a block of memory currently allocated via this allocator,
7972
///
8073
/// * size must be the same size that was used to allocate that block of memory,
81-
pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
74+
pub unsafe fn free_aligned(ptr: NonNull<u8>, size: usize) {
8275
if ptr != null_pointer() {
83-
let size = size * size_of::<T>();
8476
std::alloc::dealloc(
8577
ptr.as_ptr() as *mut u8,
8678
Layout::from_size_align_unchecked(size, ALIGNMENT),
@@ -99,13 +91,11 @@ pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) {
9991
///
10092
/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e.,
10193
/// the rounded value must be less than usize::MAX).
102-
pub unsafe fn reallocate<T: NativeType>(
103-
ptr: NonNull<T>,
94+
pub unsafe fn reallocate(
95+
ptr: NonNull<u8>,
10496
old_size: usize,
10597
new_size: usize,
106-
) -> NonNull<T> {
107-
let old_size = old_size * size_of::<T>();
108-
let new_size = new_size * size_of::<T>();
98+
) -> NonNull<u8> {
10999
if ptr == null_pointer() {
110100
return allocate_aligned(new_size);
111101
}
@@ -119,7 +109,7 @@ pub unsafe fn reallocate<T: NativeType>(
119109
ptr.as_ptr() as *mut u8,
120110
Layout::from_size_align_unchecked(old_size, ALIGNMENT),
121111
new_size,
122-
) as *mut T;
112+
);
123113
NonNull::new(raw_ptr).unwrap_or_else(|| {
124114
handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT))
125115
})

arrow/src/buffer/immutable.rs renamed to arrow-buffer/src/buffer/immutable.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use std::{convert::AsRef, usize};
2323

2424
use crate::alloc::{Allocation, Deallocation};
2525
use crate::util::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
26-
use crate::{bytes::Bytes, datatypes::ArrowNativeType};
26+
use crate::{bytes::Bytes, native::ArrowNativeType};
2727

2828
use super::ops::bitwise_unary_op_helper;
2929
use super::MutableBuffer;
@@ -271,7 +271,7 @@ impl Buffer {
271271
/// Prefer this to `collect` whenever possible, as it is ~60% faster.
272272
/// # Example
273273
/// ```
274-
/// # use arrow::buffer::Buffer;
274+
/// # use arrow_buffer::buffer::Buffer;
275275
/// let v = vec![1u32];
276276
/// let iter = v.iter().map(|x| x * 2);
277277
/// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };

arrow-buffer/src/buffer/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! This module contains two main structs: [Buffer] and [MutableBuffer]. A buffer represents
19+
//! a contiguous memory region that can be shared via `offsets`.
20+
21+
mod immutable;
22+
pub use immutable::*;
23+
mod mutable;
24+
pub use mutable::*;
25+
mod ops;
26+
mod scalar;
27+
pub use scalar::*;
28+
29+
pub use ops::*;

arrow/src/buffer/mutable.rs renamed to arrow-buffer/src/buffer/mutable.rs

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::alloc::Deallocation;
2020
use crate::{
2121
alloc,
2222
bytes::Bytes,
23-
datatypes::{ArrowNativeType, ToByteSlice},
23+
native::{ArrowNativeType, ToByteSlice},
2424
util::bit_util,
2525
};
2626
use std::ptr::NonNull;
@@ -31,12 +31,12 @@ use std::ptr::NonNull;
3131
/// Use [MutableBuffer::push] to insert an item, [MutableBuffer::extend_from_slice]
3232
/// to insert many items, and `into` to convert it to [`Buffer`].
3333
///
34-
/// For a safe, strongly typed API consider using [`crate::array::BufferBuilder`]
34+
/// For a safe, strongly typed API consider using `arrow::array::BufferBuilder`
3535
///
3636
/// # Example
3737
///
3838
/// ```
39-
/// # use arrow::buffer::{Buffer, MutableBuffer};
39+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
4040
/// let mut buffer = MutableBuffer::new(0);
4141
/// buffer.push(256u32);
4242
/// buffer.extend_from_slice(&[1u32]);
@@ -75,7 +75,7 @@ impl MutableBuffer {
7575
/// all bytes are guaranteed to be `0u8`.
7676
/// # Example
7777
/// ```
78-
/// # use arrow::buffer::{Buffer, MutableBuffer};
78+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
7979
/// let mut buffer = MutableBuffer::from_len_zeroed(127);
8080
/// assert_eq!(buffer.len(), 127);
8181
/// assert!(buffer.capacity() >= 127);
@@ -131,7 +131,7 @@ impl MutableBuffer {
131131
/// `self.len + additional > capacity`.
132132
/// # Example
133133
/// ```
134-
/// # use arrow::buffer::{Buffer, MutableBuffer};
134+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
135135
/// let mut buffer = MutableBuffer::new(0);
136136
/// buffer.reserve(253); // allocates for the first time
137137
/// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation
@@ -171,7 +171,7 @@ impl MutableBuffer {
171171
/// growing it (potentially reallocating it) and writing `value` in the newly available bytes.
172172
/// # Example
173173
/// ```
174-
/// # use arrow::buffer::{Buffer, MutableBuffer};
174+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
175175
/// let mut buffer = MutableBuffer::new(0);
176176
/// buffer.resize(253, 2); // allocates for the first time
177177
/// assert_eq!(buffer.as_slice()[252], 2u8);
@@ -195,7 +195,7 @@ impl MutableBuffer {
195195
///
196196
/// # Example
197197
/// ```
198-
/// # use arrow::buffer::{Buffer, MutableBuffer};
198+
/// # use arrow_buffer::buffer::{Buffer, MutableBuffer};
199199
/// // 2 cache lines
200200
/// let mut buffer = MutableBuffer::new(128);
201201
/// assert_eq!(buffer.capacity(), 128);
@@ -322,7 +322,7 @@ impl MutableBuffer {
322322
/// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed.
323323
/// # Example
324324
/// ```
325-
/// # use arrow::buffer::MutableBuffer;
325+
/// # use arrow_buffer::buffer::MutableBuffer;
326326
/// let mut buffer = MutableBuffer::new(0);
327327
/// buffer.extend_from_slice(&[2u32, 0]);
328328
/// assert_eq!(buffer.len(), 8) // u32 has 4 bytes
@@ -346,7 +346,7 @@ impl MutableBuffer {
346346
/// Extends the buffer with a new item, increasing its capacity if needed.
347347
/// # Example
348348
/// ```
349-
/// # use arrow::buffer::MutableBuffer;
349+
/// # use arrow_buffer::buffer::MutableBuffer;
350350
/// let mut buffer = MutableBuffer::new(0);
351351
/// buffer.push(256u32);
352352
/// assert_eq!(buffer.len(), 4) // u32 has 4 bytes
@@ -384,7 +384,7 @@ impl MutableBuffer {
384384
/// # Safety
385385
/// The caller must ensure that the buffer was properly initialized up to `len`.
386386
#[inline]
387-
pub(crate) unsafe fn set_len(&mut self, len: usize) {
387+
pub unsafe fn set_len(&mut self, len: usize) {
388388
assert!(len <= self.capacity());
389389
self.len = len;
390390
}
@@ -394,16 +394,16 @@ impl MutableBuffer {
394394
/// This is similar to `from_trusted_len_iter_bool`, however, can be significantly faster
395395
/// as it eliminates the conditional `Iterator::next`
396396
#[inline]
397-
pub(crate) fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
398-
let mut buffer = Self::new(bit_util::ceil(len, 8));
397+
pub fn collect_bool<F: FnMut(usize) -> bool>(len: usize, mut f: F) -> Self {
398+
let mut buffer = Self::new(bit_util::ceil(len, 64) * 8);
399399

400-
let chunks = len / 8;
401-
let remainder = len % 8;
400+
let chunks = len / 64;
401+
let remainder = len % 64;
402402
for chunk in 0..chunks {
403403
let mut packed = 0;
404-
for bit_idx in 0..8 {
405-
let i = bit_idx + chunk * 8;
406-
packed |= (f(i) as u8) << bit_idx;
404+
for bit_idx in 0..64 {
405+
let i = bit_idx + chunk * 64;
406+
packed |= (f(i) as u64) << bit_idx;
407407
}
408408

409409
// SAFETY: Already allocated sufficient capacity
@@ -413,14 +413,15 @@ impl MutableBuffer {
413413
if remainder != 0 {
414414
let mut packed = 0;
415415
for bit_idx in 0..remainder {
416-
let i = bit_idx + chunks * 8;
417-
packed |= (f(i) as u8) << bit_idx;
416+
let i = bit_idx + chunks * 64;
417+
packed |= (f(i) as u64) << bit_idx;
418418
}
419419

420420
// SAFETY: Already allocated sufficient capacity
421421
unsafe { buffer.push_unchecked(packed) }
422422
}
423423

424+
buffer.truncate(bit_util::ceil(len, 8));
424425
buffer
425426
}
426427
}
@@ -484,7 +485,7 @@ impl MutableBuffer {
484485
/// Prefer this to `collect` whenever possible, as it is faster ~60% faster.
485486
/// # Example
486487
/// ```
487-
/// # use arrow::buffer::MutableBuffer;
488+
/// # use arrow_buffer::buffer::MutableBuffer;
488489
/// let v = vec![1u32];
489490
/// let iter = v.iter().map(|x| x * 2);
490491
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) };
@@ -525,10 +526,10 @@ impl MutableBuffer {
525526
}
526527

527528
/// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
528-
/// # use arrow::buffer::MutableBuffer;
529+
/// # use arrow_buffer::buffer::MutableBuffer;
529530
/// # Example
530531
/// ```
531-
/// # use arrow::buffer::MutableBuffer;
532+
/// # use arrow_buffer::buffer::MutableBuffer;
532533
/// let v = vec![false, true, false];
533534
/// let iter = v.iter().map(|x| *x || true);
534535
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };

arrow/src/buffer/ops.rs renamed to arrow-buffer/src/buffer/ops.rs

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,19 @@ use crate::util::bit_util::ceil;
2020

2121
/// Apply a bitwise operation `op` to four inputs and return the result as a Buffer.
2222
/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
23-
#[allow(clippy::too_many_arguments)]
24-
pub(crate) fn bitwise_quaternary_op_helper<F>(
25-
first: &Buffer,
26-
first_offset_in_bits: usize,
27-
second: &Buffer,
28-
second_offset_in_bits: usize,
29-
third: &Buffer,
30-
third_offset_in_bits: usize,
31-
fourth: &Buffer,
32-
fourth_offset_in_bits: usize,
23+
pub fn bitwise_quaternary_op_helper<F>(
24+
buffers: [&Buffer; 4],
25+
offsets: [usize; 4],
3326
len_in_bits: usize,
3427
op: F,
3528
) -> Buffer
3629
where
3730
F: Fn(u64, u64, u64, u64) -> u64,
3831
{
39-
let first_chunks = first.bit_chunks(first_offset_in_bits, len_in_bits);
40-
let second_chunks = second.bit_chunks(second_offset_in_bits, len_in_bits);
41-
let third_chunks = third.bit_chunks(third_offset_in_bits, len_in_bits);
42-
let fourth_chunks = fourth.bit_chunks(fourth_offset_in_bits, len_in_bits);
32+
let first_chunks = buffers[0].bit_chunks(offsets[0], len_in_bits);
33+
let second_chunks = buffers[1].bit_chunks(offsets[1], len_in_bits);
34+
let third_chunks = buffers[2].bit_chunks(offsets[2], len_in_bits);
35+
let fourth_chunks = buffers[3].bit_chunks(offsets[3], len_in_bits);
4336

4437
let chunks = first_chunks
4538
.iter()

arrow/src/buffer/scalar.rs renamed to arrow-buffer/src/buffer/scalar.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use crate::buffer::Buffer;
19-
use crate::datatypes::ArrowNativeType;
19+
use crate::native::ArrowNativeType;
2020
use std::ops::Deref;
2121

2222
/// Provides a safe API for interpreting a [`Buffer`] as a slice of [`ArrowNativeType`]

0 commit comments

Comments
 (0)