Skip to content

Commit 4f84c26

Browse files
authored
add json_as_text for ->> operator (#32)
1 parent 175dba5 commit 4f84c26

12 files changed

+130
-24
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ repos:
1212

1313
- repo: local
1414
hooks:
15-
- id: format-check
16-
name: Format Check
15+
- id: format
16+
name: Format
1717
entry: cargo fmt
1818
types: [rust]
1919
language: system

README.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@ To register the below JSON functions in your `SessionContext`.
1717

1818
## Done
1919

20-
* [x] `json_contains(json: str, *keys: str | int) -> bool` - true if a JSON object has a specific key
21-
* [x] `json_get(json: str, *keys: str | int) -> JsonUnion` - Get a value from a JSON object by its "path"
22-
* [x] `json_get_str(json: str, *keys: str | int) -> str` - Get a string value from a JSON object by its "path"
23-
* [x] `json_get_int(json: str, *keys: str | int) -> int` - Get an integer value from a JSON object by its "path"
24-
* [x] `json_get_float(json: str, *keys: str | int) -> float` - Get a float value from a JSON object by its "path"
25-
* [x] `json_get_bool(json: str, *keys: str | int) -> bool` - Get a boolean value from a JSON object by its "path"
26-
* [x] `json_get_json(json: str, *keys: str | int) -> str` - Get any value from a JSON object by its "path", represented as a string
27-
* [x] `json_length(json: str, *keys: str | int) -> int` - get the length of a JSON object or array
20+
* [x] `json_contains(json: str, *keys: str | int) -> bool` - true if a JSON string has a specific key (used for the `?` operator)
21+
* [x] `json_get(json: str, *keys: str | int) -> JsonUnion` - Get a value from a JSON string by its "path"
22+
* [x] `json_get_str(json: str, *keys: str | int) -> str` - Get a string value from a JSON string by its "path"
23+
* [x] `json_get_int(json: str, *keys: str | int) -> int` - Get an integer value from a JSON string by its "path"
24+
* [x] `json_get_float(json: str, *keys: str | int) -> float` - Get a float value from a JSON string by its "path"
25+
* [x] `json_get_bool(json: str, *keys: str | int) -> bool` - Get a boolean value from a JSON string by its "path"
26+
* [x] `json_get_json(json: str, *keys: str | int) -> str` - Get a nested raw JSON string from a JSON string by its "path"
27+
* [x] `json_as_text(json: str, *keys: str | int) -> str` - Get any value from a JSON string by its "path", represented as a string (used for the `->>` operator)
28+
* [x] `json_length(json: str, *keys: str | int) -> int` - get the length of a JSON string or array
2829

2930
Cast expressions with `json_get` are rewritten to the appropriate method, e.g.
3031

@@ -38,7 +39,7 @@ select * from foo where json_get_str(attributes, 'bar')='ham'
3839

3940
## TODO (maybe, if they're actually useful)
4041

41-
* [ ] `json_keys(json: str, *keys: str | int) -> list[str]` - get the keys of a JSON object
42+
* [ ] `json_keys(json: str, *keys: str | int) -> list[str]` - get the keys of a JSON string
4243
* [ ] `json_is_obj(json: str, *keys: str | int) -> bool` - true if the JSON is an object
4344
* [ ] `json_is_array(json: str, *keys: str | int) -> bool` - true if the JSON is an array
4445
* [ ] `json_valid(json: str) -> bool` - true if the JSON is valid

src/json_as_text.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
use std::any::Any;
2+
use std::sync::Arc;
3+
4+
use crate::common::{check_args, get_err, invoke, jiter_json_find, GetError, JsonPath};
5+
use crate::common_macros::make_udf_function;
6+
use arrow::array::{ArrayRef, StringArray};
7+
use arrow_schema::DataType;
8+
use datafusion_common::{Result as DataFusionResult, ScalarValue};
9+
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
10+
use jiter::Peek;
11+
12+
make_udf_function!(
13+
JsonAsText,
14+
json_as_text,
15+
json_data path,
16+
r#"Get any value from a JSON string by its "path", represented as a string"#
17+
);
18+
19+
#[derive(Debug)]
20+
pub(super) struct JsonAsText {
21+
signature: Signature,
22+
aliases: [String; 1],
23+
}
24+
25+
impl Default for JsonAsText {
26+
fn default() -> Self {
27+
Self {
28+
signature: Signature::variadic_any(Volatility::Immutable),
29+
aliases: ["json_as_text".to_string()],
30+
}
31+
}
32+
}
33+
34+
impl ScalarUDFImpl for JsonAsText {
35+
fn as_any(&self) -> &dyn Any {
36+
self
37+
}
38+
39+
fn name(&self) -> &str {
40+
self.aliases[0].as_str()
41+
}
42+
43+
fn signature(&self) -> &Signature {
44+
&self.signature
45+
}
46+
47+
fn return_type(&self, arg_types: &[DataType]) -> DataFusionResult<DataType> {
48+
check_args(arg_types, self.name()).map(|()| DataType::Utf8)
49+
}
50+
51+
fn invoke(&self, args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
52+
invoke::<StringArray, String>(
53+
args,
54+
jiter_json_as_text,
55+
|c| Ok(Arc::new(c) as ArrayRef),
56+
ScalarValue::Utf8,
57+
)
58+
}
59+
60+
fn aliases(&self) -> &[String] {
61+
&self.aliases
62+
}
63+
}
64+
65+
fn jiter_json_as_text(opt_json: Option<&str>, path: &[JsonPath]) -> Result<String, GetError> {
66+
if let Some((mut jiter, peek)) = jiter_json_find(opt_json, path) {
67+
match peek {
68+
Peek::Null => {
69+
jiter.known_null()?;
70+
get_err!()
71+
}
72+
Peek::String => Ok(jiter.known_str()?.to_owned()),
73+
_ => {
74+
let start = jiter.current_index();
75+
jiter.known_skip(peek)?;
76+
let object_slice = jiter.slice_to_current(start);
77+
let object_string = std::str::from_utf8(object_slice)?;
78+
Ok(object_string.to_owned())
79+
}
80+
}
81+
} else {
82+
get_err!()
83+
}
84+
}

src/json_get.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ make_udf_function!(
1616
JsonGet,
1717
json_get,
1818
json_data path,
19-
r#"Get a value from a JSON object by its "path""#
19+
r#"Get a value from a JSON string by its "path""#
2020
);
2121

2222
// build_typed_get!(JsonGet, "json_get", Union, Float64Array, jiter_json_get_float);

src/json_get_bool.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ make_udf_function!(
1414
JsonGetBool,
1515
json_get_bool,
1616
json_data path,
17-
r#"Get an boolean value from a JSON object by its "path""#
17+
r#"Get an boolean value from a JSON string by its "path""#
1818
);
1919

2020
#[derive(Debug)]

src/json_get_float.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ make_udf_function!(
1414
JsonGetFloat,
1515
json_get_float,
1616
json_data path,
17-
r#"Get a float value from a JSON object by its "path""#
17+
r#"Get a float value from a JSON string by its "path""#
1818
);
1919

2020
#[derive(Debug)]

src/json_get_int.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ make_udf_function!(
1414
JsonGetInt,
1515
json_get_int,
1616
json_data path,
17-
r#"Get an integer value from a JSON object by its "path""#
17+
r#"Get an integer value from a JSON string by its "path""#
1818
);
1919

2020
#[derive(Debug)]

src/json_get_json.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ make_udf_function!(
1313
JsonGetJson,
1414
json_get_json,
1515
json_data path,
16-
r#"Get any value from a JSON object by its "path", represented as a string"#
16+
r#"Get a nested raw JSON string from a JSON string by its "path""#
1717
);
1818

1919
#[derive(Debug)]

src/json_get_str.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ make_udf_function!(
1414
JsonGetStr,
1515
json_get_str,
1616
json_data path,
17-
r#"Get a string value from a JSON object by its "path""#
17+
r#"Get a string value from a JSON string by its "path""#
1818
);
1919

2020
#[derive(Debug)]

src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use std::sync::Arc;
77
mod common;
88
mod common_macros;
99
mod common_union;
10+
mod json_as_text;
1011
mod json_contains;
1112
mod json_get;
1213
mod json_get_bool;
@@ -18,6 +19,7 @@ mod json_length;
1819
mod rewrite;
1920

2021
pub mod functions {
22+
pub use crate::json_as_text::json_as_text;
2123
pub use crate::json_contains::json_contains;
2224
pub use crate::json_get::json_get;
2325
pub use crate::json_get_bool::json_get_bool;
@@ -29,6 +31,7 @@ pub mod functions {
2931
}
3032

3133
pub mod udfs {
34+
pub use crate::json_as_text::json_as_text_udf;
3235
pub use crate::json_contains::json_contains_udf;
3336
pub use crate::json_get::json_get_udf;
3437
pub use crate::json_get_bool::json_get_bool_udf;
@@ -55,6 +58,7 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
5558
json_get_float::json_get_float_udf(),
5659
json_get_int::json_get_int_udf(),
5760
json_get_json::json_get_json_udf(),
61+
json_as_text::json_as_text_udf(),
5862
json_get_str::json_get_str_udf(),
5963
json_contains::json_contains_udf(),
6064
json_length::json_length_udf(),

src/rewrite.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ impl ExprPlanner for JsonExprPlanner {
8989
fn plan_binary_op(&self, expr: RawBinaryExpr, _schema: &DFSchema) -> Result<PlannerResult<RawBinaryExpr>> {
9090
let (func, op_display) = match &expr.op {
9191
BinaryOperator::Arrow => (crate::json_get::json_get_udf(), "->"),
92-
BinaryOperator::LongArrow => (crate::json_get_str::json_get_str_udf(), "->>"),
92+
BinaryOperator::LongArrow => (crate::json_as_text::json_as_text_udf(), "->>"),
9393
BinaryOperator::Question => (crate::json_contains::json_contains_udf(), "?"),
9494
_ => return Ok(PlannerResult::Original(expr)),
9595
};

tests/main.rs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -755,8 +755,8 @@ async fn test_long_arrow() {
755755
"| name | json_data ->> Utf8(\"foo\") |",
756756
"+------------------+---------------------------+",
757757
"| object_foo | abc |",
758-
"| object_foo_array | |",
759-
"| object_foo_obj | |",
758+
"| object_foo_array | [1] |",
759+
"| object_foo_obj | {} |",
760760
"| object_foo_null | |",
761761
"| object_bar | |",
762762
"| list_foo | |",
@@ -771,7 +771,7 @@ async fn test_plan_long_arrow() {
771771
let lines = logical_plan(r#"explain select json_data->>'foo' from test"#).await;
772772

773773
let expected = [
774-
"Projection: json_get_str(test.json_data, Utf8(\"foo\")) AS json_data ->> Utf8(\"foo\")",
774+
"Projection: json_as_text(test.json_data, Utf8(\"foo\")) AS json_data ->> Utf8(\"foo\")",
775775
" TableScan: test projection=[json_data]",
776776
];
777777

@@ -789,8 +789,8 @@ async fn test_long_arrow_eq_str() {
789789
"| name | json_data ->> Utf8(\"foo\") = Utf8(\"abc\") |",
790790
"+------------------+-----------------------------------------+",
791791
"| object_foo | true |",
792-
"| object_foo_array | |",
793-
"| object_foo_obj | |",
792+
"| object_foo_array | false |",
793+
"| object_foo_obj | false |",
794794
"| object_foo_null | |",
795795
"| object_bar | |",
796796
"| list_foo | |",
@@ -933,7 +933,7 @@ async fn test_arrow_nested_double_columns() {
933933
async fn test_lexical_precedence_wrong() {
934934
let sql = r#"select '{"a": "b"}'->>'a'='b' as v"#;
935935
let err = run_query(sql).await.unwrap_err();
936-
assert_eq!(err.to_string(), "Error during planning: Unexpected argument type to 'json_get_str' at position 2, expected string or int, got Boolean.")
936+
assert_eq!(err.to_string(), "Error during planning: Unexpected argument type to 'json_as_text' at position 2, expected string or int, got Boolean.")
937937
}
938938

939939
#[tokio::test]
@@ -1099,3 +1099,20 @@ async fn test_arrow_scalar_union_is_null() {
10991099
];
11001100
assert_batches_eq!(expected, &batches);
11011101
}
1102+
1103+
#[tokio::test]
1104+
async fn test_arrow_cast() {
1105+
let batches = run_query("select (json_data->>'foo')::int from other").await.unwrap();
1106+
1107+
let expected = [
1108+
"+---------------------------+",
1109+
"| json_data ->> Utf8(\"foo\") |",
1110+
"+---------------------------+",
1111+
"| 42 |",
1112+
"| 42 |",
1113+
"| |",
1114+
"| |",
1115+
"+---------------------------+",
1116+
];
1117+
assert_batches_eq!(expected, &batches);
1118+
}

0 commit comments

Comments
 (0)