Skip to content

Commit edd1aad

Browse files
committed
Table commit retries based on table properties
1 parent 7f7bb03 commit edd1aad

File tree

4 files changed

+166
-2
lines changed

4 files changed

+166
-2
lines changed

poetry.lock

Lines changed: 15 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/table/__init__.py

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from copy import copy
2525
from dataclasses import dataclass
2626
from enum import Enum
27-
from functools import cached_property, singledispatch
27+
from functools import cached_property, partial, singledispatch
2828
from itertools import chain
2929
from typing import (
3030
TYPE_CHECKING,
@@ -43,6 +43,7 @@
4343

4444
from pydantic import Field, SerializeAsAny
4545
from sortedcontainers import SortedList
46+
from tenacity import RetryError, Retrying, retry_if_exception_type, stop_after_attempt, stop_after_delay, wait_exponential
4647
from typing_extensions import Annotated
4748

4849
from pyiceberg.exceptions import CommitFailedException, ResolveError, ValidationError
@@ -791,6 +792,76 @@ class CommitTableResponse(IcebergBaseModel):
791792
metadata_location: str = Field(alias="metadata-location")
792793

793794

795+
class TableCommitRetry:
796+
"""Decorator for building the table commit retry controller."""
797+
798+
num_retries = "commit.retry.num-retries"
799+
num_retries_default: int = 4
800+
min_wait_ms = "commit.retry.min-wait-ms"
801+
min_wait_ms_default: int = 100
802+
max_wait_ms = "commit.retry.max-wait-ms"
803+
max_wait_ms_default: int = 60000 # 1 min
804+
total_timeout_ms = "commit.retry.total-timeout-ms"
805+
total_timeout_ms_default: int = 1800000 # 30 mins
806+
807+
def __init__(self, func: Callable[..., Any], properties_attribute: str = "properties") -> None:
808+
self.properties_attr: str = properties_attribute
809+
self.func: Callable[..., Any] = func
810+
self.loaded_properties: Properties = {}
811+
812+
def __get__(self, instance: Any, owner: Any) -> Callable[..., Any]:
813+
"""Return the __call__ method with the instance caller."""
814+
return partial(self.__call__, instance)
815+
816+
def __call__(self, instance: Any, *args: Any, **kwargs: Any) -> Any:
817+
"""Run function with the retrying controller on the caller instance."""
818+
self.loaded_properties = getattr(instance, self.properties_attr)
819+
try:
820+
for attempt in self.build_retry_controller():
821+
with attempt:
822+
result = self.func(instance, *args, **kwargs)
823+
except RetryError as err:
824+
raise Exception from err.reraise()
825+
else:
826+
return result
827+
828+
@property
829+
def table_properties(self) -> Properties:
830+
"""Get the table properties from the instance that is calling this decorator."""
831+
return self.loaded_properties
832+
833+
def build_retry_controller(self) -> Retrying:
834+
"""Build the retry controller."""
835+
return Retrying(
836+
stop=(
837+
stop_after_attempt(self.get_config(self.num_retries, self.num_retries_default))
838+
| stop_after_delay(
839+
datetime.timedelta(milliseconds=self.get_config(self.total_timeout_ms, self.total_timeout_ms_default))
840+
)
841+
),
842+
wait=wait_exponential(min=self.get_config(self.min_wait_ms, self.min_wait_ms_default) / 1000.0),
843+
retry=retry_if_exception_type(CommitFailedException),
844+
)
845+
846+
def get_config(self, config: str, default: int) -> int:
847+
"""Get config out of the properties."""
848+
return self.to_int(self.table_properties.get(config, ""), default)
849+
850+
@staticmethod
851+
def to_int(v: str, default: int) -> int:
852+
"""Convert str value to int, otherwise return a default."""
853+
try:
854+
return int(v)
855+
except (ValueError, TypeError):
856+
pass
857+
return default
858+
859+
860+
def table_commit_retry(properties_attribute: str) -> Callable[..., TableCommitRetry]:
861+
"""Decorate TableCommitRetry to capture the `properties_attribute`."""
862+
return partial(TableCommitRetry, properties_attribute=properties_attribute)
863+
864+
794865
class Table:
795866
identifier: Identifier = Field()
796867
metadata: TableMetadata
@@ -994,6 +1065,7 @@ def refs(self) -> Dict[str, SnapshotRef]:
9941065
"""Return the snapshot references in the table."""
9951066
return self.metadata.refs
9961067

1068+
@table_commit_retry("properties")
9971069
def _do_commit(self, updates: Tuple[TableUpdate, ...], requirements: Tuple[TableRequirement, ...]) -> None:
9981070
response = self.catalog._commit_table( # pylint: disable=W0212
9991071
CommitTableRequest(

pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ adlfs = { version = ">=2023.1.0,<2024.2.0", optional = true }
7070
gcsfs = { version = ">=2023.1.0,<2024.1.0", optional = true }
7171
psycopg2-binary = { version = ">=2.9.6", optional = true }
7272
sqlalchemy = { version = "^2.0.18", optional = true }
73+
tenacity = "8.2.3"
7374

7475
[tool.poetry.dev-dependencies]
7576
pytest = "7.4.4"
@@ -295,6 +296,10 @@ ignore_missing_imports = true
295296
module = "setuptools.*"
296297
ignore_missing_imports = true
297298

299+
[[tool.mypy.overrides]]
300+
module = "tenacity.*"
301+
ignore_missing_imports = true
302+
298303
[tool.coverage.run]
299304
source = ['pyiceberg/']
300305

tests/table/test_init.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
_generate_snapshot_id,
6262
_match_deletes_to_data_file,
6363
_TableMetadataUpdateContext,
64+
table_commit_retry,
6465
update_table_metadata,
6566
)
6667
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadataUtil, TableMetadataV2
@@ -77,6 +78,7 @@
7778
SortOrder,
7879
)
7980
from pyiceberg.transforms import BucketTransform, IdentityTransform
81+
from pyiceberg.typedef import Properties
8082
from pyiceberg.types import (
8183
BinaryType,
8284
BooleanType,
@@ -982,3 +984,74 @@ def test_correct_schema() -> None:
982984
_ = t.scan(snapshot_id=-1).projection()
983985

984986
assert "Snapshot not found: -1" in str(exc_info.value)
987+
988+
989+
def test_non_commit_failure_retry() -> None:
990+
class CustomException(Exception):
991+
pass
992+
993+
class TestTableCommitRetiesCustomError:
994+
def __init__(self) -> None:
995+
self.count: int = 0
996+
self.properties: Properties = {
997+
"commit.retry.num-retries": "3",
998+
"commit.retry.max-wait-ms": "0",
999+
"commit.retry.min-wait-ms": "0",
1000+
}
1001+
1002+
@table_commit_retry("properties")
1003+
def my_function(self) -> None:
1004+
self.count += 1
1005+
raise CustomException
1006+
1007+
test_table_commits_retry = TestTableCommitRetiesCustomError()
1008+
1009+
with pytest.raises(
1010+
CustomException,
1011+
):
1012+
test_table_commits_retry.my_function()
1013+
assert test_table_commits_retry.count == 1
1014+
1015+
1016+
def test_custom_retry_commit_config() -> None:
1017+
class TestTableCommitReties:
1018+
def __init__(self) -> None:
1019+
self.count: int = 0
1020+
self.properties: Properties = {
1021+
"commit.retry.num-retries": "3",
1022+
"commit.retry.max-wait-ms": "0",
1023+
"commit.retry.min-wait-ms": "0",
1024+
}
1025+
1026+
@table_commit_retry("properties")
1027+
def my_function(self) -> None:
1028+
self.count += 1
1029+
raise CommitFailedException
1030+
1031+
test_table_commits_retry = TestTableCommitReties()
1032+
1033+
with pytest.raises(CommitFailedException):
1034+
test_table_commits_retry.my_function()
1035+
assert test_table_commits_retry.count == 3
1036+
1037+
1038+
def test_invalid_commit_retry_config() -> None:
1039+
class TestTableCommitReties:
1040+
def __init__(self) -> None:
1041+
self.count: int = 0
1042+
self.properties: Properties = {
1043+
"commit.retry.num-retries": "I AM INVALID",
1044+
"commit.retry.max-wait-ms": "0",
1045+
"commit.retry.min-wait-ms": "0",
1046+
}
1047+
1048+
@table_commit_retry("properties")
1049+
def my_function(self) -> None:
1050+
self.count += 1
1051+
raise CommitFailedException
1052+
1053+
test_table_commits_retry = TestTableCommitReties()
1054+
1055+
with pytest.raises(CommitFailedException):
1056+
test_table_commits_retry.my_function()
1057+
assert test_table_commits_retry.count == 4

0 commit comments

Comments
 (0)