Skip to content

Commit

Permalink
Refactor SandboxConfig to use get_default_sandbox_config_for_eval
Browse files Browse the repository at this point in the history
  • Loading branch information
openhands-agent committed Feb 13, 2025
1 parent 8242721 commit fcba1af
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 37 deletions.
8 changes: 2 additions & 6 deletions evaluation/benchmarks/bird/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
SandboxConfig,
get_llm_config_arg,
parse_arguments,
get_default_sandbox_config_for_eval,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
Expand Down Expand Up @@ -76,12 +77,7 @@ def get_config(
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
sandbox=get_default_sandbox_config_for_eval(),
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
Expand Down
28 changes: 10 additions & 18 deletions evaluation/benchmarks/commit0_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
SandboxConfig,
get_llm_config_arg,
get_parser,
get_default_sandbox_config_for_eval,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
Expand Down Expand Up @@ -105,38 +106,29 @@ def get_config(
instance: pd.Series,
metadata: EvalMetadata,
) -> AppConfig:
# COMMIT0_CONTAINER_IMAGE = 'wentingzhao/'
assert USE_INSTANCE_IMAGE
# We use a different instance image for the each instance of commit0 eval
repo_name = instance['repo'].split('/')[1]
base_container_image = get_instance_docker_image(repo_name)
logger.info(
f'Using instance container image: {base_container_image}. '
f'Please make sure this image exists. '
f'Submit an issue on https://github.com/All-Hands-AI/OpenHands if you run into any issues.'
)
# else:
# raise
# base_container_image = SWE_BENCH_CONTAINER_IMAGE
# logger.info(f'Using swe-bench container image: {base_container_image}')

sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = base_container_image
sandbox_config.timeout = 300 # large enough timeout, since some testcases take very long to run
sandbox_config.api_key = os.environ.get('ALLHANDS_API_KEY', None)
sandbox_config.remote_runtime_api_url = os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL')
sandbox_config.keep_runtime_alive = False
sandbox_config.remote_runtime_init_timeout = 3600

config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
max_iterations=metadata.max_iterations,
runtime=os.environ.get('RUNTIME', 'docker'),
sandbox=SandboxConfig(
base_container_image=base_container_image,
enable_auto_lint=True,
use_host_network=False,
# large enough timeout, since some testcases take very long to run
timeout=300,
api_key=os.environ.get('ALLHANDS_API_KEY', None),
remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
keep_runtime_alive=False,
remote_runtime_init_timeout=3600,
remote_runtime_enable_retries=True,
),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
Expand Down
13 changes: 6 additions & 7 deletions evaluation/benchmarks/mint/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
SandboxConfig,
get_llm_config_arg,
get_parser,
get_default_sandbox_config_for_eval,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
Expand Down Expand Up @@ -103,18 +104,16 @@ def load_incontext_example(task_name: str, with_tool: bool = True):
def get_config(
metadata: EvalMetadata,
) -> AppConfig:
sandbox_config = get_default_sandbox_config_for_eval()
sandbox_config.base_container_image = 'xingyaoww/od-eval-mint:v1.0'
sandbox_config.runtime_extra_deps = f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}'

config = AppConfig(
default_agent=metadata.agent_class,
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='xingyaoww/od-eval-mint:v1.0',
enable_auto_lint=True,
use_host_network=False,
runtime_extra_deps=f'$OH_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}',
remote_runtime_enable_retries=True,
),
sandbox=sandbox_config,
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
Expand Down
8 changes: 2 additions & 6 deletions evaluation/benchmarks/toolqa/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
SandboxConfig,
get_llm_config_arg,
get_parser,
get_default_sandbox_config_for_eval,
)
from openhands.core.logger import openhands_logger as logger
from openhands.core.main import create_runtime, run_controller
Expand All @@ -46,12 +47,7 @@ def get_config(
run_as_openhands=False,
runtime='docker',
max_iterations=metadata.max_iterations,
sandbox=SandboxConfig(
base_container_image='python:3.12-bookworm',
enable_auto_lint=True,
use_host_network=False,
remote_runtime_enable_retries=True,
),
sandbox=get_default_sandbox_config_for_eval(),
# do not mount workspace
workspace_base=None,
workspace_mount_path=None,
Expand Down

0 comments on commit fcba1af

Please sign in to comment.