Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better LLM retry behavior #6557

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions openhands/core/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,11 @@ class LLMConfig(BaseModel):
aws_region_name: str | None = Field(default=None)
openrouter_site_url: str = Field(default='https://docs.all-hands.dev/')
openrouter_app_name: str = Field(default='OpenHands')
num_retries: int = Field(default=8)
# total wait time: 5 + 10 + 20 + 30 = 65 seconds
num_retries: int = Field(default=4)
retry_multiplier: float = Field(default=2)
retry_min_wait: int = Field(default=15)
retry_max_wait: int = Field(default=120)
retry_min_wait: int = Field(default=5)
retry_max_wait: int = Field(default=30)
timeout: int | None = Field(default=None)
max_message_chars: int = Field(
default=30_000
Expand Down
13 changes: 1 addition & 12 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@
from litellm import completion as litellm_completion
from litellm import completion_cost as litellm_completion_cost
from litellm.exceptions import (
APIConnectionError,
APIError,
enyst marked this conversation as resolved.
Show resolved Hide resolved
InternalServerError,
RateLimitError,
ServiceUnavailableError,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

503 is a transitory error, we could probably keep it?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. It's transitory but also unexpected...

I'm open to it but I lean towards telling the user their LLM is flaking out rather than OpenHands looking like it's slow

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kinda agree with you actually. We always had a problem in understanding our retry settings, because it's a bit weird to figure out a sensible default for "unexpected stuff happened".

And now we do allow the user to continue normally after reporting the error.

eval is the exception, I'd love to hear from Xingyao on that.

)
from litellm.types.utils import CostPerToken, ModelResponse, Usage
from litellm.utils import create_pretrained_tokenizer
Expand All @@ -41,15 +38,7 @@
__all__ = ['LLM']

# tuple of exceptions to retry on
LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
APIConnectionError,
# FIXME: APIError is useful on 502 from a proxy for example,
# but it also retries on other errors that are permanent
APIError,
InternalServerError,
RateLimitError,
ServiceUnavailableError,
)
LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (RateLimitError,)

# cache prompt supporting models
# remove this when we gemini and deepseek are supported
Expand Down
34 changes: 0 additions & 34 deletions tests/unit/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@

import pytest
from litellm.exceptions import (
APIConnectionError,
InternalServerError,
RateLimitError,
ServiceUnavailableError,
)

from openhands.core.config import LLMConfig
Expand Down Expand Up @@ -187,21 +184,6 @@ def test_completion_with_mocked_logger(
@pytest.mark.parametrize(
'exception_class,extra_args,expected_retries',
[
(
APIConnectionError,
{'llm_provider': 'test_provider', 'model': 'test_model'},
2,
),
(
InternalServerError,
{'llm_provider': 'test_provider', 'model': 'test_model'},
2,
),
(
ServiceUnavailableError,
{'llm_provider': 'test_provider', 'model': 'test_model'},
2,
),
(RateLimitError, {'llm_provider': 'test_provider', 'model': 'test_model'}, 2),
],
)
Expand Down Expand Up @@ -254,22 +236,6 @@ def test_completion_rate_limit_wait_time(mock_litellm_completion, default_config
), f'Expected wait time between {default_config.retry_min_wait} and {default_config.retry_max_wait} seconds, but got {wait_time}'


@patch('openhands.llm.llm.litellm_completion')
def test_completion_exhausts_retries(mock_litellm_completion, default_config):
mock_litellm_completion.side_effect = APIConnectionError(
'Persistent error', llm_provider='test_provider', model='test_model'
)

llm = LLM(config=default_config)
with pytest.raises(APIConnectionError):
llm.completion(
messages=[{'role': 'user', 'content': 'Hello!'}],
stream=False,
)

assert mock_litellm_completion.call_count == llm.config.num_retries


@patch('openhands.llm.llm.litellm_completion')
def test_completion_operation_cancelled(mock_litellm_completion, default_config):
mock_litellm_completion.side_effect = OperationCancelled('Operation cancelled')
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def test_load_from_toml_llm_missing_generic(
assert custom_only.model == 'custom-only-model'
assert custom_only.api_key.get_secret_value() == 'custom-only-api-key'
assert custom_only.embedding_model == 'local' # default value
assert custom_only.num_retries == 8 # default value
assert custom_only.num_retries == 4 # default value


def test_load_from_toml_llm_invalid_config(
Expand Down
Loading