|
33 | 33 | from pyiceberg.catalog import Catalog
|
34 | 34 | from pyiceberg.exceptions import NoSuchTableError
|
35 | 35 | from pyiceberg.io import FileIO
|
36 |
| -from pyiceberg.io.pyarrow import UnsupportedPyArrowTypeException |
| 36 | +from pyiceberg.io.pyarrow import UnsupportedPyArrowTypeException, schema_to_pyarrow |
37 | 37 | from pyiceberg.manifest import DataFile
|
38 | 38 | from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec
|
39 | 39 | from pyiceberg.schema import Schema
|
40 | 40 | from pyiceberg.table import Table
|
41 | 41 | from pyiceberg.table.metadata import TableMetadata
|
42 |
| -from pyiceberg.transforms import BucketTransform, IdentityTransform, MonthTransform |
| 42 | +from pyiceberg.transforms import BucketTransform, HourTransform, IdentityTransform, MonthTransform |
43 | 43 | from pyiceberg.types import (
|
44 | 44 | BooleanType,
|
45 | 45 | DateType,
|
46 | 46 | IntegerType,
|
47 | 47 | LongType,
|
48 | 48 | NestedField,
|
49 | 49 | StringType,
|
| 50 | + TimestampType, |
50 | 51 | TimestamptzType,
|
51 | 52 | )
|
52 | 53 |
|
@@ -898,3 +899,30 @@ def test_add_files_that_referenced_by_current_snapshot_with_check_duplicate_file
|
898 | 899 | with pytest.raises(ValueError) as exc_info:
|
899 | 900 | tbl.add_files(file_paths=[existing_files_in_table], check_duplicate_files=True)
|
900 | 901 | assert f"Cannot add files that are already referenced by table, files: {existing_files_in_table}" in str(exc_info.value)
|
| 902 | + |
| 903 | + |
| 904 | +@pytest.mark.integration |
| 905 | +def test_add_files_hour_transform(session_catalog: Catalog) -> None: |
| 906 | + identifier = "default.test_add_files_hour_transform" |
| 907 | + |
| 908 | + schema = Schema(NestedField(1, "hourly", TimestampType())) |
| 909 | + schema_arrow = schema_to_pyarrow(schema, include_field_ids=False) |
| 910 | + spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=HourTransform(), name="spec_hour")) |
| 911 | + |
| 912 | + tbl = _create_table(session_catalog, identifier, format_version=1, schema=schema, partition_spec=spec) |
| 913 | + |
| 914 | + file_path = "s3://warehouse/default/test_add_files_hour_transform/test.parquet" |
| 915 | + |
| 916 | + from pyiceberg.utils.datetime import micros_to_timestamp |
| 917 | + |
| 918 | + arrow_table = pa.Table.from_pylist( |
| 919 | + [{"hourly": micros_to_timestamp(1743465600155254)}, {"hourly": micros_to_timestamp(1743469198047855)}], |
| 920 | + schema=schema_arrow, |
| 921 | + ) |
| 922 | + |
| 923 | + fo = tbl.io.new_output(file_path) |
| 924 | + with fo.create(overwrite=True) as fos: |
| 925 | + with pq.ParquetWriter(fos, schema=schema_arrow) as writer: |
| 926 | + writer.write_table(arrow_table) |
| 927 | + |
| 928 | + tbl.add_files(file_paths=[file_path]) |
0 commit comments