Skip to content

Commit 513376c

Browse files
authored
feat: support arrow dictionary in schema conversion (#1293)
## Which issue does this PR close? - Closes #1277 ## What changes are included in this PR? An expansion to the current schema conversion for arrow to an iceberg schema. This uses the suggestion provided by @alamb ([here](#1277 (comment))) for the schema conversion work. ## Are these changes tested? The current test for schema conversion has been expanded. I've also taken the liberty of altering the `assert_eq!` call to use the `pretty_assertions` version, as it makes viewing the failure much simpler with a large JSON structure - I can revert this though if it is problematic.
1 parent b4bc6dd commit 513376c

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

crates/iceberg/src/arrow/schema.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ fn visit_type<V: ArrowSchemaVisitor>(r#type: &DataType, visitor: &mut V) -> Resu
167167
)),
168168
},
169169
DataType::Struct(fields) => visit_struct(fields, visitor),
170+
DataType::Dictionary(_key_type, value_type) => visit_type(value_type, visitor),
170171
other => Err(Error::new(
171172
ErrorKind::DataInvalid,
172173
format!("Cannot visit Arrow data type: {other}"),
@@ -1029,6 +1030,7 @@ mod tests {
10291030
Arc::new(simple_field(DEFAULT_MAP_FIELD_NAME, r#struct, false, "17")),
10301031
false,
10311032
);
1033+
let dictionary = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
10321034

10331035
let fields = Fields::from(vec![
10341036
simple_field("aa", DataType::Int32, false, "18"),
@@ -1108,6 +1110,7 @@ mod tests {
11081110
),
11091111
simple_field("map", map, false, "16"),
11101112
simple_field("struct", r#struct, false, "17"),
1113+
simple_field("dictionary", dictionary, false, "30"),
11111114
])
11121115
}
11131116

@@ -1285,6 +1288,12 @@ mod tests {
12851288
}
12861289
]
12871290
}
1291+
},
1292+
{
1293+
"id":30,
1294+
"name":"dictionary",
1295+
"required":true,
1296+
"type":"string"
12881297
}
12891298
],
12901299
"identifier-field-ids":[]
@@ -1299,7 +1308,7 @@ mod tests {
12991308
let arrow_schema = arrow_schema_for_arrow_schema_to_schema_test();
13001309
let schema = iceberg_schema_for_arrow_schema_to_schema_test();
13011310
let converted_schema = arrow_schema_to_schema(&arrow_schema).unwrap();
1302-
assert_eq!(converted_schema, schema);
1311+
pretty_assertions::assert_eq!(converted_schema, schema);
13031312
}
13041313

13051314
fn arrow_schema_for_schema_to_arrow_schema_test() -> ArrowSchema {

0 commit comments

Comments
 (0)