Skip to content

Commit b38c731

Browse files
fix: write hive partitions for any int/uint/float (#15337)
Co-authored-by: christophermcdermott <masked>
1 parent 49737d8 commit b38c731

File tree

2 files changed

+81
-13
lines changed

2 files changed

+81
-13
lines changed

datafusion/datasource/src/write/demux.rs

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@ use arrow::array::{
3333
};
3434
use arrow::datatypes::{DataType, Schema};
3535
use datafusion_common::cast::{
36-
as_boolean_array, as_date32_array, as_date64_array, as_int32_array, as_int64_array,
37-
as_string_array, as_string_view_array,
36+
as_boolean_array, as_date32_array, as_date64_array, as_float16_array,
37+
as_float32_array, as_float64_array, as_int16_array, as_int32_array, as_int64_array,
38+
as_int8_array, as_string_array, as_string_view_array, as_uint16_array,
39+
as_uint32_array, as_uint64_array, as_uint8_array,
3840
};
3941
use datafusion_common::{exec_datafusion_err, not_impl_err, DataFusionError};
4042
use datafusion_common_runtime::SpawnedTask;
@@ -407,6 +409,18 @@ fn compute_partition_keys_by_row<'a>(
407409
partition_values.push(Cow::from(date));
408410
}
409411
}
412+
DataType::Int8 => {
413+
let array = as_int8_array(col_array)?;
414+
for i in 0..rb.num_rows() {
415+
partition_values.push(Cow::from(array.value(i).to_string()));
416+
}
417+
}
418+
DataType::Int16 => {
419+
let array = as_int16_array(col_array)?;
420+
for i in 0..rb.num_rows() {
421+
partition_values.push(Cow::from(array.value(i).to_string()));
422+
}
423+
}
410424
DataType::Int32 => {
411425
let array = as_int32_array(col_array)?;
412426
for i in 0..rb.num_rows() {
@@ -419,6 +433,48 @@ fn compute_partition_keys_by_row<'a>(
419433
partition_values.push(Cow::from(array.value(i).to_string()));
420434
}
421435
}
436+
DataType::UInt8 => {
437+
let array = as_uint8_array(col_array)?;
438+
for i in 0..rb.num_rows() {
439+
partition_values.push(Cow::from(array.value(i).to_string()));
440+
}
441+
}
442+
DataType::UInt16 => {
443+
let array = as_uint16_array(col_array)?;
444+
for i in 0..rb.num_rows() {
445+
partition_values.push(Cow::from(array.value(i).to_string()));
446+
}
447+
}
448+
DataType::UInt32 => {
449+
let array = as_uint32_array(col_array)?;
450+
for i in 0..rb.num_rows() {
451+
partition_values.push(Cow::from(array.value(i).to_string()));
452+
}
453+
}
454+
DataType::UInt64 => {
455+
let array = as_uint64_array(col_array)?;
456+
for i in 0..rb.num_rows() {
457+
partition_values.push(Cow::from(array.value(i).to_string()));
458+
}
459+
}
460+
DataType::Float16 => {
461+
let array = as_float16_array(col_array)?;
462+
for i in 0..rb.num_rows() {
463+
partition_values.push(Cow::from(array.value(i).to_string()));
464+
}
465+
}
466+
DataType::Float32 => {
467+
let array = as_float32_array(col_array)?;
468+
for i in 0..rb.num_rows() {
469+
partition_values.push(Cow::from(array.value(i).to_string()));
470+
}
471+
}
472+
DataType::Float64 => {
473+
let array = as_float64_array(col_array)?;
474+
for i in 0..rb.num_rows() {
475+
partition_values.push(Cow::from(array.value(i).to_string()));
476+
}
477+
}
422478
DataType::Dictionary(_, _) => {
423479
downcast_dictionary_array!(
424480
col_array => {

datafusion/sqllogictest/test_files/copy.slt

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -110,24 +110,36 @@ a
110110

111111
# Copy to directory as partitioned files
112112
query I
113-
COPY (values (1::int, 2::bigint, 19968::date, arrow_cast(1725235200000, 'Date64'), false, 'x'),
114-
(11::int, 22::bigint, 19969::date, arrow_cast(1725148800000, 'Date64'), true, 'y')
113+
COPY (values (arrow_cast(1, 'Int8'), arrow_cast(2, 'UInt8'), arrow_cast(3, 'Int16'), arrow_cast(4, 'UInt16'),
114+
arrow_cast(5, 'Int32'), arrow_cast(6, 'UInt32'), arrow_cast(7, 'Int64'), arrow_cast(8, 'UInt64'),
115+
arrow_cast(9.1015625, 'Float16'), arrow_cast(10.1, 'Float32'), arrow_cast(11.1, 'Float64'), 19968::date,
116+
arrow_cast(1725235200000, 'Date64'), false, 'x'),
117+
(arrow_cast(11, 'Int8'), arrow_cast(22, 'UInt8'), arrow_cast(33, 'Int16'), arrow_cast(44, 'UInt16'),
118+
arrow_cast(55, 'Int32'), arrow_cast(66, 'UInt32'), arrow_cast(77, 'Int64'), arrow_cast(88, 'UInt64'),
119+
arrow_cast(9.203125, 'Float16'), arrow_cast(10.2, 'Float32'), arrow_cast(11.2, 'Float64'), 19969::date,
120+
arrow_cast(1725148800000, 'Date64'), true, 'y')
115121
)
116-
TO 'test_files/scratch/copy/partitioned_table5/' STORED AS parquet PARTITIONED BY (column1, column2, column3, column4, column5)
122+
TO 'test_files/scratch/copy/partitioned_table5/' STORED AS parquet PARTITIONED BY (column1, column2, column3, column4,
123+
column5, column6, column7, column8, column9, column10, column11, column12, column13, column14)
117124
OPTIONS ('format.compression' 'zstd(10)');
118125
----
119126
2
120127

121128
# validate partitioning
122129
statement ok
123-
CREATE EXTERNAL TABLE validate_partitioned_parquet5 (column1 int, column2 bigint, column3 date, column4 date, column5 boolean, column6 varchar) STORED AS PARQUET
124-
LOCATION 'test_files/scratch/copy/partitioned_table5/' PARTITIONED BY (column1, column2, column3, column4, column5);
125-
126-
query IIDDBT
127-
select column1, column2, column3, column4, column5, column6 from validate_partitioned_parquet5 order by column1,column2,column3,column4,column5;
128-
----
129-
1 2 2024-09-02 2024-09-02 false x
130-
11 22 2024-09-03 2024-09-01 true y
130+
CREATE EXTERNAL TABLE validate_partitioned_parquet5 (column1 int, column2 int, column3 int, column4 int, column5 int,
131+
column6 int, column7 bigint, column8 bigint, column9 float, column10 float, column11 float, column12 date,
132+
column13 date, column14 boolean, column15 varchar) STORED AS PARQUET
133+
LOCATION 'test_files/scratch/copy/partitioned_table5/' PARTITIONED BY (column1, column2, column3, column4, column5,
134+
column6, column7, column8, column9, column10, column11, column12, column13, column14);
135+
136+
query IIIIIIIIRRRDDBT
137+
select column1, column2, column3, column4, column5, column6, column7, column8, column9, column10, column11, column12,
138+
column13, column14, column15 from validate_partitioned_parquet5 order by column1, column2, column3, column4,
139+
column5, column6, column7, column8, column9, column10, column11, column12, column13;
140+
----
141+
1 2 3 4 5 6 7 8 9.1015625 10.1 11.1 2024-09-02 2024-09-02 false x
142+
11 22 33 44 55 66 77 88 9.203125 10.2 11.2 2024-09-03 2024-09-01 true y
131143

132144

133145
statement ok

0 commit comments

Comments
 (0)