Skip to content

Commit

Permalink
fix: properly encode metadata field values when creating actions
Browse files Browse the repository at this point in the history
Fixes #3174

Signed-off-by: R. Tyler Croy <[email protected]>
  • Loading branch information
rtyler committed Feb 3, 2025
1 parent 5113aea commit 9dc8f32
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 9 deletions.
10 changes: 5 additions & 5 deletions python/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,18 +437,18 @@ impl Field {
inner = inner.with_metadata(metadata.iter().map(|(k, v)| {
(
k,
if let serde_json::Value::Number(n) = v {
n.as_i64().map_or_else(
match v {
serde_json::Value::Number(n) => n.as_i64().map_or_else(
|| MetadataValue::String(v.to_string()),
|i| {
i32::try_from(i)
.ok()
.map(MetadataValue::Number)
.unwrap_or_else(|| MetadataValue::String(v.to_string()))
},
)
} else {
MetadataValue::String(v.to_string())
),
serde_json::Value::String(s) => MetadataValue::String(s.to_string()),
other => MetadataValue::String(other.to_string()),
},
)
}));
Expand Down
8 changes: 4 additions & 4 deletions python/tests/test_generated_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def gc_schema() -> Schema:
Field(
name="gc",
type=PrimitiveType("integer"),
metadata={"delta.generationExpression": "'5'"},
metadata={"delta.generationExpression": "5"},
),
]
)
Expand Down Expand Up @@ -118,7 +118,7 @@ def test_write_with_invalid_gc_to_table(table_with_gc, invalid_gc_data):
with pytest.raises(
DeltaError,
match=re.escape(
"Invariant violations: [\"Check or Invariant (gc = '5' OR (gc IS NULL AND '5' IS NULL)) violated by value in row: [10]\"]"
'Invariant violations: ["Check or Invariant (gc = 5 OR (gc IS NULL AND 5 IS NULL)) violated by value in row: [10]"]'
),
):
write_deltalake(table_with_gc, mode="append", data=invalid_gc_data)
Expand Down Expand Up @@ -177,7 +177,7 @@ def test_raise_when_gc_passed_during_adding_new_columns(tmp_path, data_without_g
Field(
name="gc",
type=PrimitiveType("integer"),
metadata={"delta.generationExpression": "'5'"},
metadata={"delta.generationExpression": "5"},
)
]
)
Expand Down Expand Up @@ -257,7 +257,7 @@ def test_merge_with_gc_invalid(table_with_gc: DeltaTable, invalid_gc_data):
with pytest.raises(
DeltaError,
match=re.escape(
"Invariant violations: [\"Check or Invariant (gc = '5' OR (gc IS NULL AND '5' IS NULL)) violated by value in row: [10]\"]"
'Invariant violations: ["Check or Invariant (gc = 5 OR (gc IS NULL AND 5 IS NULL)) violated by value in row: [10]"]'
),
):
(
Expand Down
9 changes: 9 additions & 0 deletions python/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,3 +553,12 @@ def test_schema_conversions(schema, expected_schema, conversion_mode):
result_schema = _convert_pa_schema_to_delta(schema, conversion_mode)

assert result_schema == expected_schema


# <https://github.com/delta-io/delta-rs/issues/3174>
def test_field_serialization():
from deltalake import Field

f = Field("fieldname", "binary", metadata={"key": "value"})
assert f.name == "fieldname"
assert f.metadata == {"key": "value"}

0 comments on commit 9dc8f32

Please sign in to comment.