Skip to content

Commit d19865e

Browse files
buraksennOmega359alamb
authored
[minor] overload from_unixtime func to have optional timezone parameter (#13130)
* overloaded from ts * Update docs/source/user-guide/sql/scalar_functions_new.md Co-authored-by: Bruce Ritchie <[email protected]> * fixed return type * added sql example * optional in ∂oc * review --------- Co-authored-by: Bruce Ritchie <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 85f92ef commit d19865e

File tree

3 files changed

+148
-23
lines changed

3 files changed

+148
-23
lines changed

datafusion/functions/src/datetime/from_unixtime.rs

Lines changed: 111 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,17 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::datatypes::DataType;
19-
use arrow::datatypes::DataType::{Int64, Timestamp};
20-
use arrow::datatypes::TimeUnit::Second;
2118
use std::any::Any;
22-
use std::sync::OnceLock;
19+
use std::sync::{Arc, OnceLock};
2320

24-
use datafusion_common::{exec_err, Result};
21+
use arrow::datatypes::DataType;
22+
use arrow::datatypes::DataType::{Int64, Timestamp, Utf8};
23+
use arrow::datatypes::TimeUnit::Second;
24+
use datafusion_common::{exec_err, internal_err, ExprSchema, Result, ScalarValue};
2525
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
26+
use datafusion_expr::TypeSignature::Exact;
2627
use datafusion_expr::{
27-
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
28+
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
2829
};
2930

3031
#[derive(Debug)]
@@ -41,7 +42,10 @@ impl Default for FromUnixtimeFunc {
4142
impl FromUnixtimeFunc {
4243
pub fn new() -> Self {
4344
Self {
44-
signature: Signature::uniform(1, vec![Int64], Volatility::Immutable),
45+
signature: Signature::one_of(
46+
vec![Exact(vec![Int64, Utf8]), Exact(vec![Int64])],
47+
Volatility::Immutable,
48+
),
4549
}
4650
}
4751
}
@@ -59,28 +63,63 @@ impl ScalarUDFImpl for FromUnixtimeFunc {
5963
&self.signature
6064
}
6165

66+
fn return_type_from_exprs(
67+
&self,
68+
args: &[Expr],
69+
_schema: &dyn ExprSchema,
70+
arg_types: &[DataType],
71+
) -> Result<DataType> {
72+
match arg_types.len() {
73+
1 => Ok(Timestamp(Second, None)),
74+
2 => match &args[1] {
75+
Expr::Literal(ScalarValue::Utf8(Some(tz))) => Ok(Timestamp(Second, Some(Arc::from(tz.to_string())))),
76+
_ => exec_err!(
77+
"Second argument for `from_unixtime` must be non-null utf8, received {:?}",
78+
arg_types[1]),
79+
},
80+
_ => exec_err!(
81+
"from_unixtime function requires 1 or 2 arguments, got {}",
82+
arg_types.len()
83+
),
84+
}
85+
}
86+
6287
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
63-
Ok(Timestamp(Second, None))
88+
internal_err!("call return_type_from_exprs instead")
6489
}
6590

6691
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
67-
if args.len() != 1 {
92+
let len = args.len();
93+
if len != 1 && len != 2 {
6894
return exec_err!(
69-
"from_unixtime function requires 1 argument, got {}",
95+
"from_unixtime function requires 1 or 2 argument, got {}",
7096
args.len()
7197
);
7298
}
7399

74-
match args[0].data_type() {
75-
Int64 => args[0].cast_to(&Timestamp(Second, None), None),
76-
other => {
77-
exec_err!(
78-
"Unsupported data type {:?} for function from_unixtime",
79-
other
80-
)
81-
}
100+
if args[0].data_type() != Int64 {
101+
return exec_err!(
102+
"Unsupported data type {:?} for function from_unixtime",
103+
args[0].data_type()
104+
);
105+
}
106+
107+
match len {
108+
1 => args[0].cast_to(&Timestamp(Second, None), None),
109+
2 => match &args[1] {
110+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(tz))) => args[0]
111+
.cast_to(&Timestamp(Second, Some(Arc::from(tz.to_string()))), None),
112+
_ => {
113+
exec_err!(
114+
"Unsupported data type {:?} for function from_unixtime",
115+
args[1].data_type()
116+
)
117+
}
118+
},
119+
_ => unreachable!(),
82120
}
83121
}
122+
84123
fn documentation(&self) -> Option<&Documentation> {
85124
Some(get_from_unixtime_doc())
86125
}
@@ -93,12 +132,63 @@ fn get_from_unixtime_doc() -> &'static Documentation {
93132
Documentation::builder()
94133
.with_doc_section(DOC_SECTION_DATETIME)
95134
.with_description("Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.")
96-
.with_syntax_example("from_unixtime(expression)")
135+
.with_syntax_example("from_unixtime(expression[, timezone])")
136+
.with_standard_argument("expression", None)
97137
.with_argument(
98-
"expression",
99-
"Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
138+
"timezone",
139+
"Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC.",
100140
)
141+
.with_sql_example(r#"```sql
142+
> select from_unixtime(1599572549, 'America/New_York');
143+
+-----------------------------------------------------------+
144+
| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
145+
+-----------------------------------------------------------+
146+
| 2020-09-08T09:42:29-04:00 |
147+
+-----------------------------------------------------------+
148+
```"#)
101149
.build()
102150
.unwrap()
103151
})
104152
}
153+
154+
#[cfg(test)]
155+
mod test {
156+
use crate::datetime::from_unixtime::FromUnixtimeFunc;
157+
use datafusion_common::ScalarValue;
158+
use datafusion_common::ScalarValue::Int64;
159+
use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
160+
161+
#[test]
162+
fn test_without_timezone() {
163+
let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))];
164+
165+
let result = FromUnixtimeFunc::new().invoke(&args).unwrap();
166+
167+
match result {
168+
ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), None)) => {
169+
assert_eq!(sec, 1729900800);
170+
}
171+
_ => panic!("Expected scalar value"),
172+
}
173+
}
174+
175+
#[test]
176+
fn test_with_timezone() {
177+
let args = [
178+
ColumnarValue::Scalar(Int64(Some(1729900800))),
179+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(
180+
"America/New_York".to_string(),
181+
))),
182+
];
183+
184+
let result = FromUnixtimeFunc::new().invoke(&args).unwrap();
185+
186+
match result {
187+
ColumnarValue::Scalar(ScalarValue::TimestampSecond(Some(sec), Some(tz))) => {
188+
assert_eq!(sec, 1729900800);
189+
assert_eq!(tz.to_string(), "America/New_York");
190+
}
191+
_ => panic!("Expected scalar value"),
192+
}
193+
}
194+
}

datafusion/sqllogictest/test_files/timestamps.slt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,29 @@ SELECT from_unixtime(ts / 1000000000) FROM ts_data LIMIT 3;
308308
2020-09-08T12:42:29
309309
2020-09-08T11:42:29
310310

311+
# from_unixtime single
312+
313+
query P
314+
SELECT from_unixtime(1599572549190855123 / 1000000000, 'America/New_York');
315+
----
316+
2020-09-08T09:42:29-04:00
317+
318+
# from_unixtime with timezone
319+
query P
320+
SELECT from_unixtime(ts / 1000000000, 'Asia/Istanbul') FROM ts_data LIMIT 3;
321+
----
322+
2020-09-08T16:42:29+03:00
323+
2020-09-08T15:42:29+03:00
324+
2020-09-08T14:42:29+03:00
325+
326+
# from_unixtime with utc timezone
327+
query P
328+
SELECT from_unixtime(ts / 1000000000, 'UTC') FROM ts_data LIMIT 3;
329+
----
330+
2020-09-08T13:42:29Z
331+
2020-09-08T12:42:29Z
332+
2020-09-08T11:42:29Z
333+
311334
# to_timestamp
312335

313336
query I

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2037,12 +2037,24 @@ _Alias of [date_trunc](#date_trunc)._
20372037
Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.
20382038

20392039
```
2040-
from_unixtime(expression)
2040+
from_unixtime(expression[, timezone])
20412041
```
20422042

20432043
#### Arguments
20442044

2045-
- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
2045+
- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators.
2046+
- **timezone**: Optional timezone to use when converting the integer to a timestamp. If not provided, the default timezone is UTC.
2047+
2048+
#### Example
2049+
2050+
```sql
2051+
> select from_unixtime(1599572549, 'America/New_York');
2052+
+-----------------------------------------------------------+
2053+
| from_unixtime(Int64(1599572549),Utf8("America/New_York")) |
2054+
+-----------------------------------------------------------+
2055+
| 2020-09-08T09:42:29-04:00 |
2056+
+-----------------------------------------------------------+
2057+
```
20462058

20472059
### `make_date`
20482060

0 commit comments

Comments
 (0)