Skip to content

Commit 48cc8be

Browse files
authored
Split out arrow-schema (#2594) (#2711)
* Split out arrow-schema (#2594) * Flatten schema * Move decimal logic * Fix doc * Fix tests * Fix integration-test * Remove pyarrow orphan * PyArrow fixes * Move ArrowError to arrow-schema * Fix pyarrow * Fix test * Fix conflicts * Fix pyarrow * Tweak feature flags * Test juggling * Derive PyArrowConvert for Vec
1 parent 74f639c commit 48cc8be

File tree

21 files changed

+1625
-1493
lines changed

21 files changed

+1625
-1493
lines changed

.github/workflows/arrow.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ jobs:
6363
cargo run --example read_csv_infer_schema
6464
- name: Run non-archery based integration-tests
6565
run: cargo test -p arrow-integration-testing
66+
- name: Test arrow-schema with all features
67+
run: cargo test -p arrow-schema --all-features
6668

6769
# test compilaton features
6870
linux-features:

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
[workspace]
1919
members = [
2020
"arrow",
21+
"arrow-schema",
2122
"arrow-buffer",
2223
"arrow-flight",
2324
"parquet",

arrow-pyarrow-integration-testing/src/lib.rs

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,13 @@ use arrow::compute::kernels;
2828
use arrow::datatypes::{DataType, Field, Schema};
2929
use arrow::error::ArrowError;
3030
use arrow::ffi_stream::ArrowArrayStreamReader;
31-
use arrow::pyarrow::PyArrowConvert;
31+
use arrow::pyarrow::{PyArrowConvert, PyArrowException, PyArrowType};
3232
use arrow::record_batch::RecordBatch;
3333

34+
fn to_py_err(err: ArrowError) -> PyErr {
35+
PyArrowException::new_err(err.to_string())
36+
}
37+
3438
/// Returns `array + array` of an int64 array.
3539
#[pyfunction]
3640
fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
@@ -41,8 +45,10 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
4145
let array = array
4246
.as_any()
4347
.downcast_ref::<Int64Array>()
44-
.ok_or(ArrowError::ParseError("Expects an int64".to_string()))?;
45-
let array = kernels::arithmetic::add(array, array)?;
48+
.ok_or_else(|| ArrowError::ParseError("Expects an int64".to_string()))
49+
.map_err(to_py_err)?;
50+
51+
let array = kernels::arithmetic::add(array, array).map_err(to_py_err)?;
4652

4753
// export
4854
array.to_pyarrow(py)
@@ -66,56 +72,61 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
6672

6773
/// Returns the substring
6874
#[pyfunction]
69-
fn substring(array: ArrayData, start: i64) -> PyResult<ArrayData> {
75+
fn substring(
76+
array: PyArrowType<ArrayData>,
77+
start: i64,
78+
) -> PyResult<PyArrowType<ArrayData>> {
7079
// import
71-
let array = ArrayRef::from(array);
80+
let array = ArrayRef::from(array.0);
7281

7382
// substring
74-
let array = kernels::substring::substring(array.as_ref(), start, None)?;
83+
let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;
7584

76-
Ok(array.data().to_owned())
85+
Ok(array.data().to_owned().into())
7786
}
7887

7988
/// Returns the concatenate
8089
#[pyfunction]
81-
fn concatenate(array: ArrayData, py: Python) -> PyResult<PyObject> {
82-
let array = ArrayRef::from(array);
90+
fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> PyResult<PyObject> {
91+
let array = ArrayRef::from(array.0);
8392

8493
// concat
85-
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])?;
94+
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;
8695

8796
array.to_pyarrow(py)
8897
}
8998

9099
#[pyfunction]
91-
fn round_trip_type(obj: DataType) -> PyResult<DataType> {
100+
fn round_trip_type(obj: PyArrowType<DataType>) -> PyResult<PyArrowType<DataType>> {
92101
Ok(obj)
93102
}
94103

95104
#[pyfunction]
96-
fn round_trip_field(obj: Field) -> PyResult<Field> {
105+
fn round_trip_field(obj: PyArrowType<Field>) -> PyResult<PyArrowType<Field>> {
97106
Ok(obj)
98107
}
99108

100109
#[pyfunction]
101-
fn round_trip_schema(obj: Schema) -> PyResult<Schema> {
110+
fn round_trip_schema(obj: PyArrowType<Schema>) -> PyResult<PyArrowType<Schema>> {
102111
Ok(obj)
103112
}
104113

105114
#[pyfunction]
106-
fn round_trip_array(obj: ArrayData) -> PyResult<ArrayData> {
115+
fn round_trip_array(obj: PyArrowType<ArrayData>) -> PyResult<PyArrowType<ArrayData>> {
107116
Ok(obj)
108117
}
109118

110119
#[pyfunction]
111-
fn round_trip_record_batch(obj: RecordBatch) -> PyResult<RecordBatch> {
120+
fn round_trip_record_batch(
121+
obj: PyArrowType<RecordBatch>,
122+
) -> PyResult<PyArrowType<RecordBatch>> {
112123
Ok(obj)
113124
}
114125

115126
#[pyfunction]
116127
fn round_trip_record_batch_reader(
117-
obj: ArrowArrayStreamReader,
118-
) -> PyResult<ArrowArrayStreamReader> {
128+
obj: PyArrowType<ArrowArrayStreamReader>,
129+
) -> PyResult<PyArrowType<ArrowArrayStreamReader>> {
119130
Ok(obj)
120131
}
121132

arrow-schema/Cargo.toml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "arrow-schema"
20+
version = "23.0.0"
21+
description = "Defines the logical types for arrow arrays"
22+
homepage = "https://github.com/apache/arrow-rs"
23+
repository = "https://github.com/apache/arrow-rs"
24+
authors = ["Apache Arrow <[email protected]>"]
25+
license = "Apache-2.0"
26+
keywords = ["arrow"]
27+
include = [
28+
"benches/*.rs",
29+
"src/**/*.rs",
30+
"Cargo.toml",
31+
]
32+
edition = "2021"
33+
rust-version = "1.62"
34+
35+
[lib]
36+
name = "arrow_schema"
37+
path = "src/lib.rs"
38+
bench = false
39+
40+
[dependencies]
41+
serde = { version = "1.0", default-features = false, features = ["derive", "std"], optional = true }
42+
43+
[features]
44+
default = []
45+
46+
[dev-dependencies]
47+
serde_json = "1.0"

0 commit comments

Comments
 (0)