Skip to content

Adding promotion for UnknownType per V3+ spec #2155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyiceberg/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,11 @@ def promote(file_type: IcebergType, read_type: IcebergType) -> IcebergType:
raise ResolveError(f"Cannot promote {file_type} to {read_type}")


@promote.register(UnknownType)
def _(file_type: UnknownType, read_type: IcebergType) -> IcebergType:
return read_type # Per V3 Spec, "Unknown" can be promoted to any type


@promote.register(IntegerType)
def _(file_type: IntegerType, read_type: IcebergType) -> IcebergType:
if isinstance(read_type, LongType):
Expand Down
23 changes: 23 additions & 0 deletions tests/io/test_pyarrow_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pyiceberg.expressions.literals import literal
from pyiceberg.io.pyarrow import (
UnsupportedPyArrowTypeException,
_check_pyarrow_schema_compatible,
_ConvertToArrowSchema,
_ConvertToIceberg,
_ConvertToIcebergWithoutIDs,
Expand Down Expand Up @@ -313,6 +314,28 @@ def test_pyarrow_dictionary_encoded_type_to_iceberg(value_type: pa.DataType, exp
assert visit_pyarrow(pyarrow_dict, _ConvertToIceberg()) == expected_result


def test_schema_check_null_column(table_schema_simple: Schema) -> None:
pyarrow_schema: pa.Schema = schema_to_pyarrow(table_schema_simple)
new_field = pyarrow_schema.field(0).with_type(pa.null()) # Make the optional string field null for testing
pyarrow_schema = pyarrow_schema.set(0, new_field)
assert pyarrow_schema.field(0).type == pa.null()
_check_pyarrow_schema_compatible(table_schema_simple, pyarrow_schema)


def test_schema_conversion_null_column(table_schema_simple: Schema) -> None:
pyarrow_schema: pa.Schema = schema_to_pyarrow(table_schema_simple)
new_field = pyarrow_schema.field(2).with_type(pa.null()) # Make the optional boolean field null for testing
pyarrow_schema = pyarrow_schema.set(2, new_field)
assert pyarrow_schema.field(2).type == pa.null()
actual = str(pyarrow_to_schema(pyarrow_schema))
expected = """table {
1: foo: optional string
2: bar: required int
3: baz: optional unknown
}"""
assert actual == expected


def test_round_schema_conversion_simple(table_schema_simple: Schema) -> None:
actual = str(pyarrow_to_schema(schema_to_pyarrow(table_schema_simple)))
expected = """table {
Expand Down
4 changes: 4 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
TimestampType,
TimestamptzType,
TimeType,
UnknownType,
UUIDType,
)

Expand All @@ -74,6 +75,7 @@
FixedType(16),
FixedType(20),
UUIDType(),
UnknownType(),
]


Expand Down Expand Up @@ -855,6 +857,8 @@ def should_promote(file_type: IcebergType, read_type: IcebergType) -> bool:
return file_type.precision <= read_type.precision and file_type.scale == file_type.scale
if isinstance(file_type, FixedType) and isinstance(read_type, UUIDType) and len(file_type) == 16:
return True
if isinstance(file_type, UnknownType):
return True
return False


Expand Down