Skip to content

Commit 8cdf4ab

Browse files
authored
🐛 Write fields instead of spec object (#846)
1 parent e581b40 commit 8cdf4ab

File tree

2 files changed

+23
-27
lines changed

2 files changed

+23
-27
lines changed

pyiceberg/manifest.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,15 @@
3131
Type,
3232
)
3333

34+
from pydantic_core import to_json
35+
3436
from pyiceberg.avro.file import AvroFile, AvroOutputFile
3537
from pyiceberg.conversions import to_bytes
3638
from pyiceberg.exceptions import ValidationError
3739
from pyiceberg.io import FileIO, InputFile, OutputFile
3840
from pyiceberg.partitioning import PartitionSpec
3941
from pyiceberg.schema import Schema
40-
from pyiceberg.typedef import EMPTY_DICT, Record, TableVersion
42+
from pyiceberg.typedef import Record, TableVersion
4143
from pyiceberg.types import (
4244
BinaryType,
4345
BooleanType,
@@ -645,7 +647,6 @@ class ManifestWriter(ABC):
645647
_output_file: OutputFile
646648
_writer: AvroOutputFile[ManifestEntry]
647649
_snapshot_id: int
648-
_meta: Dict[str, str]
649650
_added_files: int
650651
_added_rows: int
651652
_existing_files: int
@@ -655,15 +656,12 @@ class ManifestWriter(ABC):
655656
_min_data_sequence_number: Optional[int]
656657
_partitions: List[Record]
657658

658-
def __init__(
659-
self, spec: PartitionSpec, schema: Schema, output_file: OutputFile, snapshot_id: int, meta: Dict[str, str] = EMPTY_DICT
660-
) -> None:
659+
def __init__(self, spec: PartitionSpec, schema: Schema, output_file: OutputFile, snapshot_id: int) -> None:
661660
self.closed = False
662661
self._spec = spec
663662
self._schema = schema
664663
self._output_file = output_file
665664
self._snapshot_id = snapshot_id
666-
self._meta = meta
667665

668666
self._added_files = 0
669667
self._added_rows = 0
@@ -697,6 +695,15 @@ def content(self) -> ManifestContent: ...
697695
@abstractmethod
698696
def version(self) -> TableVersion: ...
699697

698+
@property
699+
def _meta(self) -> Dict[str, str]:
700+
return {
701+
"schema": self._schema.model_dump_json(),
702+
"partition-spec": to_json(self._spec.fields).decode("utf-8"),
703+
"partition-spec-id": str(self._spec.spec_id),
704+
"format-version": str(self.version),
705+
}
706+
700707
def _with_partition(self, format_version: TableVersion) -> Schema:
701708
data_file_type = data_file_with_partition(
702709
format_version=format_version, partition_type=self._spec.partition_type(self._schema)
@@ -771,12 +778,6 @@ def __init__(self, spec: PartitionSpec, schema: Schema, output_file: OutputFile,
771778
schema,
772779
output_file,
773780
snapshot_id,
774-
{
775-
"schema": schema.model_dump_json(),
776-
"partition-spec": spec.model_dump_json(),
777-
"partition-spec-id": str(spec.spec_id),
778-
"format-version": "1",
779-
},
780781
)
781782

782783
def content(self) -> ManifestContent:
@@ -792,19 +793,7 @@ def prepare_entry(self, entry: ManifestEntry) -> ManifestEntry:
792793

793794
class ManifestWriterV2(ManifestWriter):
794795
def __init__(self, spec: PartitionSpec, schema: Schema, output_file: OutputFile, snapshot_id: int):
795-
super().__init__(
796-
spec,
797-
schema,
798-
output_file,
799-
snapshot_id,
800-
meta={
801-
"schema": schema.model_dump_json(),
802-
"partition-spec": spec.model_dump_json(),
803-
"partition-spec-id": str(spec.spec_id),
804-
"format-version": "2",
805-
"content": "data",
806-
},
807-
)
796+
super().__init__(spec, schema, output_file, snapshot_id)
808797

809798
def content(self) -> ManifestContent:
810799
return ManifestContent.DATA
@@ -813,6 +802,13 @@ def content(self) -> ManifestContent:
813802
def version(self) -> TableVersion:
814803
return 2
815804

805+
@property
806+
def _meta(self) -> Dict[str, str]:
807+
return {
808+
**super()._meta,
809+
"content": "data",
810+
}
811+
816812
def prepare_entry(self, entry: ManifestEntry) -> ManifestEntry:
817813
if entry.data_sequence_number is None:
818814
if entry.snapshot_id is not None and entry.snapshot_id != self._snapshot_id:

tests/utils/test_manifest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,8 @@ def test_write_manifest(
348348

349349
expected_metadata = {
350350
"schema": test_schema.model_dump_json(),
351-
"partition-spec": test_spec.model_dump_json(),
352-
"partition-spec-id": str(test_spec.spec_id),
351+
"partition-spec": """[{"source-id":1,"field-id":1,"transform":"identity","name":"VendorID"},{"source-id":2,"field-id":2,"transform":"identity","name":"tpep_pickup_datetime"}]""",
352+
"partition-spec-id": str(demo_manifest_file.partition_spec_id),
353353
"format-version": str(format_version),
354354
}
355355
_verify_metadata_with_fastavro(

0 commit comments

Comments
 (0)