Skip to content

Bump arrow to 16.0.0 #1769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
460 changes: 439 additions & 21 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion packages/benchmarks/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"private": true,
"dependencies": {
"@duckdb/duckdb-wasm": "file:../duckdb-wasm",
"apache-arrow": "^15.0.0",
"apache-arrow": "^16.0.0",
"apache-arrow-3": "npm:apache-arrow@^3.0.0",
"arquero": "^5.4.0",
"buffalo-bench": "^2.0.0",
Expand Down
2 changes: 1 addition & 1 deletion packages/duckdb-wasm-app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"dependencies": {
"@duckdb/duckdb-wasm": "file:../duckdb-wasm",
"@duckdb/duckdb-wasm-shell": "file:../duckdb-wasm-shell",
"apache-arrow": "^15.0.0",
"apache-arrow": "^16.0.0",
"bootstrap": "^5.3.3",
"classnames": "^2.5.1",
"framer-motion": "^11.0.3",
Expand Down
2 changes: 1 addition & 1 deletion packages/duckdb-wasm-shell/crate/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ wasm-bindgen = { version = "0.2.74", features = ["serde-serialize"] }
wasm-bindgen-futures = "0.4.24"
chrono = "0.4.19"
js-sys = "0.3.51"
arrow = { version = "13.0.0", features = ["csv", "ipc"] }
arrow = { version = "52.0.0", features = ["csv", "ipc"] }
lazy_static = "1.4.0"
encode_unicode = "0.3.6"
unicode-width = "0.1.8"
Expand Down
2 changes: 1 addition & 1 deletion packages/duckdb-wasm-shell/crate/src/arrow_printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn get_column_alignment(column: &arrow::array::ArrayRef) -> comfy::CellAlignment
| DataType::Float16
| DataType::Float32
| DataType::Float64
| DataType::Decimal(_, _) => comfy::CellAlignment::Right,
| DataType::Decimal128(_, _) => comfy::CellAlignment::Right,
_ => comfy::CellAlignment::Left,
}
}
Expand Down
30 changes: 19 additions & 11 deletions packages/duckdb-wasm-shell/crate/src/arrow_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use arrow::datatypes::SchemaRef;
use arrow::error::ArrowError;
use arrow::record_batch::RecordBatch;
use std::sync::Arc;
use std::collections::HashMap;

const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];

Expand All @@ -47,6 +48,7 @@ impl<'buf> Reader<'buf> {
if available < n {
Err(arrow::error::ArrowError::IoError(
"insufficient bytes available".to_string(),
std::io::Error::new(std::io::ErrorKind::Other,"")
))
} else {
let result = &self.buffer[self.position..(self.position + n)];
Expand All @@ -65,7 +67,7 @@ pub struct ArrowStreamReader {
/// Optional dictionaries for each schema field.
///
/// Dictionaries may be appended to in the streaming format.
dictionaries_by_field: Vec<Option<ArrayRef>>,
dictionaries_by_field: HashMap<i64, ArrayRef>,
/// An indicator of whether the stream is complete.
///
/// This value is set to `true` the first time the reader's `next()` returns `None`.
Expand Down Expand Up @@ -95,16 +97,16 @@ impl ArrowStreamReader {

let meta_buffer = reader.next(meta_len as usize)?;
let message = arrow::ipc::root_as_message(meta_buffer).map_err(|err| {
ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
ArrowError::IoError(format!("Unable to get root as message: {:?}", err),std::io::Error::new(std::io::ErrorKind::Other,""))
})?;
// message header is a Schema, so read it
let ipc_schema: arrow::ipc::Schema = message.header_as_schema().ok_or_else(|| {
ArrowError::IoError("Unable to read IPC message as schema".to_string())
ArrowError::IoError("Unable to read IPC message as schema".to_string(),std::io::Error::new(std::io::ErrorKind::Other,""))
})?;
let schema = arrow::ipc::convert::fb_to_schema(ipc_schema);

// Create an array of optional dictionary value arrays, one per field.
let dictionaries_by_field = vec![None; schema.fields().len()];
let dictionaries_by_field = HashMap::new();

Ok(Self {
schema: Arc::new(schema),
Expand Down Expand Up @@ -157,39 +159,45 @@ impl ArrowStreamReader {

let meta_buffer = reader.next(meta_len as usize)?;
let message = arrow::ipc::root_as_message(meta_buffer).map_err(|err| {
ArrowError::IoError(format!("Unable to get root as message: {:?}", err))
ArrowError::IoError(format!("Unable to get root as message: {:?}", err),std::io::Error::new(std::io::ErrorKind::Other,""))
})?;

match message.header_type() {
arrow::ipc::MessageHeader::Schema => Err(ArrowError::IoError(
"Not expecting a schema when messages are read".to_string(),
)),
std::io::Error::new(std::io::ErrorKind::Other,""))),
arrow::ipc::MessageHeader::RecordBatch => {
let batch = message.header_as_record_batch().ok_or_else(|| {
ArrowError::IoError("Unable to read IPC message as record batch".to_string())
ArrowError::IoError("Unable to read IPC message as record batch".to_string(),std::io::Error::new(std::io::ErrorKind::Other,""))
})?;
let metadata = arrow::ipc::gen::Schema::MetadataVersion(1);
let buf = reader.next(message.bodyLength() as usize)?;
let buf1 = arrow::buffer::Buffer::from_slice_ref(buf);
arrow::ipc::reader::read_record_batch(
&buf,
&buf1,
batch,
self.schema(),
&self.dictionaries_by_field,
None,
&metadata
)
.map(Some)
}
arrow::ipc::MessageHeader::DictionaryBatch => {
let batch = message.header_as_dictionary_batch().ok_or_else(|| {
ArrowError::IoError(
"Unable to read IPC message as dictionary batch".to_string(),
"Unable to read IPC message as dictionary batch".to_string(),std::io::Error::new(std::io::ErrorKind::Other,"")
)
})?;
let buf = reader.next(message.bodyLength() as usize)?;
let buf1 = arrow::buffer::Buffer::from_slice_ref(buf);
let metadata = arrow::ipc::gen::Schema::MetadataVersion(1);
arrow::ipc::reader::read_dictionary(
&buf,
&buf1,
batch,
&self.schema,
&mut self.dictionaries_by_field,
&metadata
)?;

// read the next message until we encounter a RecordBatch
Expand All @@ -199,7 +207,7 @@ impl ArrowStreamReader {
t => Err(ArrowError::IoError(format!(
"Reading types other than record batches not yet supported, unable to read {:?} ",
t
))),
), std::io::Error::new(std::io::ErrorKind::Other,""))),
}
}
}
2 changes: 1 addition & 1 deletion packages/duckdb-wasm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"csv"
],
"dependencies": {
"apache-arrow": "^15.0.0"
"apache-arrow": "^16.0.0"
},
"devDependencies": {
"@types/emscripten": "^1.39.10",
Expand Down
4 changes: 2 additions & 2 deletions packages/duckdb-wasm/test/regression/github_393.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export function test393(db: () => duckdb.AsyncDuckDB): void {
const resultWithoutCast = await conn.query<{
ts: arrow.TimestampMillisecond;
}>(`SELECT TIMESTAMP '1992-03-22 01:02:03' as ts`);
expect(resultWithoutCast.toArray()[0]?.ts).toEqual(new Date(Date.UTC(1992, 2, 22, 1, 2, 3)).getTime());
expect(new Date(resultWithoutCast.toArray()[0]?.ts)).toEqual(new Date(Date.UTC(1992, 2, 22, 1, 2, 3)));

await db().open({
path: ':memory:',
Expand All @@ -39,7 +39,7 @@ export function test393(db: () => duckdb.AsyncDuckDB): void {
const resultWithCast = await conn.query<{
ts: arrow.DateMillisecond;
}>(`SELECT TIMESTAMP '1992-03-22 01:02:03' as ts`);
expect(resultWithCast.toArray()[0]?.ts).toEqual(new Date(Date.UTC(1992, 2, 22, 1, 2, 3)));
expect(new Date(resultWithCast.toArray()[0]?.ts)).toEqual(new Date(Date.UTC(1992, 2, 22, 1, 2, 3)));
});
});
}
41 changes: 23 additions & 18 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@
"@duckdb/duckdb-wasm@file:packages/duckdb-wasm":
version "1.11.0"
dependencies:
apache-arrow "^15.0.0"
apache-arrow "^16.0.0"

"@emotion/is-prop-valid@^0.8.2":
version "0.8.8"
Expand Down Expand Up @@ -889,10 +889,10 @@
dependencies:
tslib "^2.4.0"

"@swc/helpers@^0.5.2":
version "0.5.3"
resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.3.tgz#98c6da1e196f5f08f977658b80d6bd941b5f294f"
integrity sha512-FaruWX6KdudYloq1AHD/4nU+UsMTdNE8CKyrseXWEcgjDAbvkwJg2QGPAnfIJLIWsjZOSPLOAykK6fuYp4vp4A==
"@swc/helpers@^0.5.10":
version "0.5.11"
resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7"
integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A==
dependencies:
tslib "^2.4.0"

Expand Down Expand Up @@ -921,7 +921,7 @@
resolved "https://registry.yarnpkg.com/@types/command-line-args/-/command-line-args-5.2.0.tgz#adbb77980a1cc376bb208e3f4142e907410430f6"
integrity sha512-UuKzKpJJ/Ief6ufIaIzr3A/0XnluX7RvFgwkV89Yzvm77wCh1kFaFmqN8XEnGcN62EuHdedQjEMb8mYxFLGPyA==

"@types/command-line-args@^5.2.1":
"@types/command-line-args@^5.2.3":
version "5.2.3"
resolved "https://registry.yarnpkg.com/@types/command-line-args/-/command-line-args-5.2.3.tgz#553ce2fd5acf160b448d307649b38ffc60d39639"
integrity sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw==
Expand All @@ -931,7 +931,7 @@
resolved "https://registry.yarnpkg.com/@types/command-line-usage/-/command-line-usage-5.0.2.tgz#ba5e3f6ae5a2009d466679cc431b50635bf1a064"
integrity sha512-n7RlEEJ+4x4TS7ZQddTmNSxP+zziEG0TNsMfiRIxcIVXt71ENJ9ojeXmGO3wPoTdn7pJcU2xc3CJYMktNT6DPg==

"@types/command-line-usage@^5.0.2":
"@types/command-line-usage@^5.0.4":
version "5.0.4"
resolved "https://registry.yarnpkg.com/@types/command-line-usage/-/command-line-usage-5.0.4.tgz#374e4c62d78fbc5a670a0f36da10235af879a0d5"
integrity sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==
Expand Down Expand Up @@ -1090,7 +1090,7 @@
dependencies:
"@types/node" "*"

"@types/node@*", "@types/node@>=10.0.0", "@types/node@^20.12.7", "@types/node@^20.6.0":
"@types/node@*", "@types/node@>=10.0.0", "@types/node@^20.12.7":
version "20.12.7"
resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.7.tgz#04080362fa3dd6c5822061aa3124f5c152cff384"
integrity sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg==
Expand Down Expand Up @@ -1714,18 +1714,18 @@ apache-arrow@^14.0.2:
pad-left "^2.1.0"
tslib "^2.5.3"

apache-arrow@^15.0.0:
version "15.0.0"
resolved "https://registry.yarnpkg.com/apache-arrow/-/apache-arrow-15.0.0.tgz#d1dc537dd64e4180ff22f7bedbf3fb6cbf2502d8"
integrity sha512-e6aunxNKM+woQf137ny3tp/xbLjFJS2oGQxQhYGqW6dGeIwNV1jOeEAeR6sS2jwAI2qLO83gYIP2MBz02Gw5Xw==
apache-arrow@^16.0.0:
version "16.1.0"
resolved "https://registry.yarnpkg.com/apache-arrow/-/apache-arrow-16.1.0.tgz#7aa8d0d436dd0995d9dc5c36febf380d5b207209"
integrity sha512-G6GiM6tzPDdGnKUnVkvVr1Nt5+hUaCMBISiasMSiJwI5L5GKDv5Du7Avc2kxlFfB/LEK2LTqh2GKSxutMdf8vQ==
dependencies:
"@swc/helpers" "^0.5.2"
"@types/command-line-args" "^5.2.1"
"@types/command-line-usage" "^5.0.2"
"@types/node" "^20.6.0"
"@swc/helpers" "^0.5.10"
"@types/command-line-args" "^5.2.3"
"@types/command-line-usage" "^5.0.4"
"@types/node" "^20.12.7"
command-line-args "^5.2.1"
command-line-usage "^7.0.1"
flatbuffers "^23.5.26"
flatbuffers "^24.3.25"
json-bignum "^0.0.3"
tslib "^2.6.2"

Expand Down Expand Up @@ -3754,11 +3754,16 @@ [email protected]:
resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-1.11.0.tgz#90a47e584dd7851ad7a913f5a0ee99c1d76ce59f"
integrity sha512-0PqFKtXI4MjxomI7jO4g5XfLPm/15g2R+5WGCHBGYGh0ihQiypnHlJ6bMmkkrAe0GzZ4d7PDAfCONKIPUxNF+A==

[email protected], flatbuffers@^23.5.26:
[email protected]:
version "23.5.26"
resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-23.5.26.tgz#01358e272a61239f0faf3bfbe4e014f3ace9d746"
integrity sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ==

flatbuffers@^24.3.25:
version "24.3.25"
resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-24.3.25.tgz#e2f92259ba8aa53acd0af7844afb7c7eb95e7089"
integrity sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==

flatted@^3.1.0:
version "3.2.2"
resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.2.2.tgz#64bfed5cb68fe3ca78b3eb214ad97b63bedce561"
Expand Down