From 4615548477b12eed183e85556c3622b22b31a12e Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Mon, 10 Feb 2025 16:54:23 -0500 Subject: [PATCH 1/3] Bump OpenHands ACI to 0.2.1 (#6678) --- poetry.lock | 12 +++++------- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/poetry.lock b/poetry.lock index 75cefb01c7c7..cf0b20e6f7e3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1370,7 +1370,6 @@ files = [ {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb"}, {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b"}, {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543"}, - {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385"}, {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e"}, {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e"}, {file = "cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053"}, @@ -1381,7 +1380,6 @@ files = [ {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289"}, {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7"}, {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c"}, - {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba"}, {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64"}, {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285"}, {file = "cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417"}, @@ -5864,14 +5862,14 @@ realtime = ["websockets (>=13,<15)"] [[package]] name = "openhands-aci" -version = "0.2.0" +version = "0.2.1" description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands." optional = false python-versions = "<4.0,>=3.12" groups = ["main"] files = [ - {file = "openhands_aci-0.2.0-py3-none-any.whl", hash = "sha256:5ca0df7ab6dab1034e70d3982b401db9888dd6deb8149d30e47193bf8588ed65"}, - {file = "openhands_aci-0.2.0.tar.gz", hash = "sha256:6c54defd07a7b2e861ff5c8f683777c2c2503a0f417eeb570382be682e7038d6"}, + {file = "openhands_aci-0.2.1-py3-none-any.whl", hash = "sha256:10f5038e6303b8e1d40af0b61fb967f1d9d00c1ef05c2a06db2529364f3fef04"}, + {file = "openhands_aci-0.2.1.tar.gz", hash = "sha256:7dc72ba3aa7e9d699aacf8b85909ecaa6b87e7e4f203e4d549f740ac56d5ca2b"}, ] [package.dependencies] @@ -10638,4 +10636,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "74eaaaed3b4f3e617760db7a38bea4fa6b9f66e169c5f4e9053c9a197fe616ca" +content-hash = "439164c45c674574af3fc15ade8e69452c11d4d45d95c2e671c752cfed6a3143" diff --git a/pyproject.toml b/pyproject.toml index 81f5b71b6645..70e5836cc4a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ runloop-api-client = "0.22.0" libtmux = ">=0.37,<0.40" pygithub = "^2.5.0" joblib = "*" -openhands-aci = "^0.2.0" +openhands-aci = "^0.2.1" python-socketio = "^5.11.4" redis = "^5.2.0" sse-starlette = "^2.1.3" From 1a715d2ec4c59a48dd070d02a59ef2869f0f92dd Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Tue, 11 Feb 2025 00:00:46 +0100 Subject: [PATCH 2/3] Clean up global in llm.py (we figured it's not needed) (#6675) --- .../agenthub/codeact_agent/codeact_agent.py | 1 - openhands/llm/llm.py | 19 ------------------- 2 files changed, 20 deletions(-) diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 62ad243e915b..5a1f6d54a84d 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -303,7 +303,6 @@ def get_observation_message( and len(obs.set_of_marks) > 0 and self.config.enable_som_visual_browsing and self.llm.vision_is_active() - and self.llm.is_visual_browser_tool_supported() ): text += 'Image: Current webpage screenshot (Note that only visible portion of webpage is present in the screenshot. You may need to scroll to view the remaining portion of the web-page.)\n' message = Message( diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py index ff3c62772b47..e8dd2f6f1ef8 100644 --- a/openhands/llm/llm.py +++ b/openhands/llm/llm.py @@ -75,16 +75,6 @@ 'o3-mini', ] -# visual browsing tool supported models -# This flag is needed since gpt-4o and gpt-4o-mini do not allow passing image_urls with role='tool' -VISUAL_BROWSING_TOOL_SUPPORTED_MODELS = [ - 'claude-3-5-sonnet', - 'claude-3-5-sonnet-20240620', - 'claude-3-5-sonnet-20241022', - 'o1-2024-12-17', -] - - REASONING_EFFORT_SUPPORTED_MODELS = [ 'o1-2024-12-17', 'o1', @@ -495,15 +485,6 @@ def is_function_calling_active(self) -> bool: """ return self._function_calling_active - def is_visual_browser_tool_supported(self) -> bool: - return ( - self.config.model in VISUAL_BROWSING_TOOL_SUPPORTED_MODELS - or self.config.model.split('/')[-1] in VISUAL_BROWSING_TOOL_SUPPORTED_MODELS - or any( - m in self.config.model for m in VISUAL_BROWSING_TOOL_SUPPORTED_MODELS - ) - ) - def _post_completion(self, response: ModelResponse) -> float: """Post-process the completion response. From 6a6dc93e0379bfdf96096fce06b21a8e51871aec Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Mon, 10 Feb 2025 22:21:11 -0500 Subject: [PATCH 3/3] feat(runtime): use `prlimit` to limit resource usage of command to avoid OOM Runtime Kill (#6338) Co-authored-by: openhands Co-authored-by: Engel Nyst Co-authored-by: Graham Neubig --- openhands/runtime/action_execution_server.py | 19 +++ .../runtime/impl/remote/remote_runtime.py | 10 +- openhands/runtime/utils/bash.py | 16 ++- tests/runtime/test_runtime_resource.py | 113 ++++++++++++++++++ tests/runtime/test_stress_docker_runtime.py | 36 ------ 5 files changed, 151 insertions(+), 43 deletions(-) create mode 100644 tests/runtime/test_runtime_resource.py delete mode 100644 tests/runtime/test_stress_docker_runtime.py diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py index 2148ab2267d1..f2d12196fede 100644 --- a/openhands/runtime/action_execution_server.py +++ b/openhands/runtime/action_execution_server.py @@ -21,6 +21,7 @@ from pathlib import Path from zipfile import ZipFile +import psutil from fastapi import Depends, FastAPI, HTTPException, Request, UploadFile from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse, StreamingResponse @@ -108,6 +109,22 @@ def __init__( self.last_execution_time = self.start_time self._initialized = False + if _override_max_memory_gb := os.environ.get('RUNTIME_MAX_MEMORY_GB', None): + self.max_memory_gb = int(_override_max_memory_gb) + logger.info( + f'Setting max memory to {self.max_memory_gb}GB (according to the RUNTIME_MAX_MEMORY_GB environment variable)' + ) + else: + # Get available system memory + total_memory_gb = psutil.virtual_memory().total / ( + 1024 * 1024 * 1024 + ) # Convert to GB + self.max_memory_gb = int(max(0.5, total_memory_gb - 1.0)) + # Reserve 1GB as head room, minimum of 0.5GB + logger.info( + f'Total memory: {total_memory_gb}GB, setting limit to {self.max_memory_gb}GB (reserved 1GB for action execution server, minimum 0.5GB)' + ) + @property def initial_cwd(self): return self._initial_cwd @@ -120,8 +137,10 @@ async def ainit(self): no_change_timeout_seconds=int( os.environ.get('NO_CHANGE_TIMEOUT_SECONDS', 30) ), + max_memory_mb=self.max_memory_gb * 1024, ) self.bash_session.initialize() + await wait_all( (self._init_plugin(plugin) for plugin in self.plugins_to_load), timeout=30, diff --git a/openhands/runtime/impl/remote/remote_runtime.py b/openhands/runtime/impl/remote/remote_runtime.py index cb10b2c15b78..f33acedbf4ad 100644 --- a/openhands/runtime/impl/remote/remote_runtime.py +++ b/openhands/runtime/impl/remote/remote_runtime.py @@ -212,13 +212,17 @@ def _start_runtime(self): plugins=self.plugins, app_config=self.config, ) + environment = { + 'DEBUG': 'true' + if self.config.debug or os.environ.get('DEBUG', 'false').lower() == 'true' + else {}, + } + environment.update(self.config.sandbox.runtime_startup_env_vars) start_request = { 'image': self.container_image, 'command': command, 'working_dir': '/openhands/code/', - 'environment': {'DEBUG': 'true'} - if self.config.debug or os.environ.get('DEBUG', 'false').lower() == 'true' - else {}, + 'environment': environment, 'session_id': self.sid, 'resource_factor': self.config.sandbox.remote_runtime_resource_factor, } diff --git a/openhands/runtime/utils/bash.py b/openhands/runtime/utils/bash.py index 5fda883d4d01..419573d7546d 100644 --- a/openhands/runtime/utils/bash.py +++ b/openhands/runtime/utils/bash.py @@ -175,25 +175,32 @@ def __init__( work_dir: str, username: str | None = None, no_change_timeout_seconds: int = 30, + max_memory_mb: int | None = None, ): self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds self.work_dir = work_dir self.username = username self._initialized = False + self.max_memory_mb = max_memory_mb def initialize(self): self.server = libtmux.Server() - window_command = '/bin/bash' + _shell_command = '/bin/bash' if self.username in ['root', 'openhands']: # This starts a non-login (new) shell for the given user - window_command = f'su {self.username} -' + _shell_command = f'su {self.username} -' # otherwise, we are running as the CURRENT USER (e.g., when running LocalRuntime) + if self.max_memory_mb is not None: + window_command = ( + f'prlimit --as={self.max_memory_mb * 1024 * 1024} {_shell_command}' + ) + else: + window_command = _shell_command + logger.debug(f'Initializing bash session with command: {window_command}') session_name = f'openhands-{self.username}-{uuid.uuid4()}' self.session = self.server.new_session( session_name=session_name, - window_name='bash', - window_command=window_command, start_directory=self.work_dir, kill_session=True, x=1000, @@ -207,6 +214,7 @@ def initialize(self): # We need to create a new pane because the initial pane's history limit is (default) 2000 _initial_window = self.session.attached_window self.window = self.session.new_window( + window_name='bash', window_shell=window_command, start_directory=self.work_dir, ) diff --git a/tests/runtime/test_runtime_resource.py b/tests/runtime/test_runtime_resource.py new file mode 100644 index 000000000000..2873939f132d --- /dev/null +++ b/tests/runtime/test_runtime_resource.py @@ -0,0 +1,113 @@ +"""Stress tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.""" + +from conftest import _close_test_runtime, _load_runtime + +from openhands.core.logger import openhands_logger as logger +from openhands.events.action import CmdRunAction + + +def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1): + runtime, config = _load_runtime( + temp_dir, + runtime_cls, + docker_runtime_kwargs={ + 'cpu_period': 100000, # 100ms + 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU) + 'mem_limit': '4G', # 4 GB of memory + }, + ) + + action = CmdRunAction( + command='sudo apt-get update && sudo apt-get install -y stress-ng' + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + for _ in range(repeat): + # run stress-ng stress tests for 1 minute + action = CmdRunAction(command='stress-ng --all 1 -t 30s') + action.set_hard_timeout(120) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + + _close_test_runtime(runtime) + + +def test_stress_docker_runtime_hit_memory_limits(temp_dir, runtime_cls): + """Test runtime behavior under resource constraints.""" + runtime, config = _load_runtime( + temp_dir, + runtime_cls, + docker_runtime_kwargs={ + 'cpu_period': 100000, # 100ms + 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU) + 'mem_limit': '4G', # 4 GB of memory + 'memswap_limit': '0', # No swap + 'mem_swappiness': 0, # Disable swapping + 'oom_kill_disable': False, # Enable OOM killer + }, + runtime_startup_env_vars={ + 'RUNTIME_MAX_MEMORY_GB': '3', + }, + ) + + action = CmdRunAction( + command='sudo apt-get update && sudo apt-get install -y stress-ng' + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + action = CmdRunAction( + command='stress-ng --vm 1 --vm-bytes 6G --timeout 30s --metrics' + ) + action.set_hard_timeout(120) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert 'aborted early, out of system resources' in obs.content + assert obs.exit_code == 3 # OOM killed! + + _close_test_runtime(runtime) + + +def test_stress_docker_runtime_within_memory_limits(temp_dir, runtime_cls): + """Test runtime behavior under resource constraints.""" + runtime, config = _load_runtime( + temp_dir, + runtime_cls, + docker_runtime_kwargs={ + 'cpu_period': 100000, # 100ms + 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU) + 'mem_limit': '4G', # 4 GB of memory + 'memswap_limit': '0', # No swap + 'mem_swappiness': 0, # Disable swapping + 'oom_kill_disable': False, # Enable OOM killer + }, + runtime_startup_env_vars={ + 'RUNTIME_MAX_MEMORY_GB': '7', + }, + ) + + action = CmdRunAction( + command='sudo apt-get update && sudo apt-get install -y stress-ng' + ) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + action = CmdRunAction( + command='stress-ng --vm 1 --vm-bytes 6G --timeout 30s --metrics' + ) + action.set_hard_timeout(120) + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + assert obs.exit_code == 0 + + _close_test_runtime(runtime) diff --git a/tests/runtime/test_stress_docker_runtime.py b/tests/runtime/test_stress_docker_runtime.py deleted file mode 100644 index b679a0836253..000000000000 --- a/tests/runtime/test_stress_docker_runtime.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Stress tests for the DockerRuntime, which connects to the ActionExecutor running in the sandbox.""" - -from conftest import _close_test_runtime, _load_runtime - -from openhands.core.logger import openhands_logger as logger -from openhands.events.action import CmdRunAction - - -def test_stress_docker_runtime(temp_dir, runtime_cls, repeat=1): - runtime, config = _load_runtime( - temp_dir, - runtime_cls, - docker_runtime_kwargs={ - 'cpu_period': 100000, # 100ms - 'cpu_quota': 100000, # Can use 100ms out of each 100ms period (1 CPU) - 'mem_limit': '4G', # 4 GB of memory - }, - ) - - action = CmdRunAction( - command='sudo apt-get update && sudo apt-get install -y stress-ng' - ) - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - assert obs.exit_code == 0 - - for _ in range(repeat): - # run stress-ng stress tests for 1 minute - action = CmdRunAction(command='stress-ng --all 1 -t 1m') - action.set_hard_timeout(120) - logger.info(action, extra={'msg_type': 'ACTION'}) - obs = runtime.run_action(action) - logger.info(obs, extra={'msg_type': 'OBSERVATION'}) - - _close_test_runtime(runtime)