Skip to content

Commit b26a5c2

Browse files
committed
Update
1 parent 17be62a commit b26a5c2

File tree

3 files changed

+58
-43
lines changed

3 files changed

+58
-43
lines changed

common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,12 @@ public List<ArrowBuf> visit(ArrowType.LargeUtf8 type) {
260260
type,
261261
start,
262262
end);
263-
final long len = end - start;
263+
// HACK: For the issue https://github.com/apache/datafusion-comet/issues/540
264+
// As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import
265+
// a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again.
266+
// So we add this hack to always take full length of data buffer by assuming the first offset
267+
// is always 0 which is true for Arrow Java and arrow-rs.
268+
final long len = end;
264269
offsets.getReferenceManager().retain();
265270
return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len));
266271
}
@@ -277,7 +282,12 @@ public List<ArrowBuf> visit(ArrowType.Binary type) {
277282
type,
278283
start,
279284
end);
280-
final int len = end - start;
285+
// HACK: For the issue https://github.com/apache/datafusion-comet/issues/540
286+
// As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import
287+
// a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again.
288+
// So we add this hack to always take full length of data buffer by assuming the first offset
289+
// is always 0 which is true for Arrow Java and arrow-rs.
290+
final int len = end;
281291
offsets.getReferenceManager().retain();
282292
return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len));
283293
}
@@ -296,7 +306,12 @@ public List<ArrowBuf> visit(ArrowType.LargeBinary type) {
296306
type,
297307
start,
298308
end);
299-
final long len = end - start;
309+
// HACK: For the issue https://github.com/apache/datafusion-comet/issues/540
310+
// As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import
311+
// a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again.
312+
// So we add this hack to always take full length of data buffer by assuming the first offset
313+
// is always 0 which is true for Arrow Java and arrow-rs.
314+
final long len = end;
300315
offsets.getReferenceManager().retain();
301316
return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len));
302317
}

core/Cargo.lock

Lines changed: 27 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/Cargo.toml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ include = [
3333

3434
[dependencies]
3535
parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate
36-
arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd", features = ["prettyprint", "ffi", "chrono-tz"] }
37-
arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" }
38-
arrow-buffer = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" }
39-
arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" }
40-
arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" }
41-
arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" }
42-
parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd", default-features = false, features = ["experimental"] }
36+
arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "5aa7e7f", features = ["prettyprint", "ffi", "chrono-tz"] }
37+
arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "5aa7e7f" }
38+
arrow-buffer = { git = "https://github.com/viirya/arrow-rs.git", rev = "5aa7e7f" }
39+
arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "5aa7e7f" }
40+
arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "5aa7e7f" }
41+
arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "5aa7e7f" }
42+
parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "5aa7e7f", default-features = false, features = ["experimental"] }
4343
half = { version = "2.4.1", default-features = false }
4444
futures = "0.3.28"
4545
mimalloc = { version = "*", default-features = false, optional = true }
@@ -71,12 +71,12 @@ itertools = "0.11.0"
7171
chrono = { version = "0.4", default-features = false, features = ["clock"] }
7272
chrono-tz = { version = "0.8" }
7373
paste = "1.0.14"
74-
datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1" }
75-
datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", features = ["unicode_expressions", "crypto_expressions"] }
76-
datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", features = ["crypto_expressions"] }
77-
datafusion-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", default-features = false }
78-
datafusion-physical-expr-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", default-features = false }
79-
datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", default-features = false }
74+
datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "bad5579" }
75+
datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "bad5579", features = ["unicode_expressions", "crypto_expressions"] }
76+
datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "bad5579", features = ["crypto_expressions"] }
77+
datafusion-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "bad5579", default-features = false }
78+
datafusion-physical-expr-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "bad5579", default-features = false }
79+
datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "bad5579", default-features = false }
8080
unicode-segmentation = "^1.10.1"
8181
once_cell = "1.18.0"
8282
regex = "1.9.6"

0 commit comments

Comments
 (0)