All-Hands-AI · li-boxuan · Feb 15, 2025 · Feb 2, 2025 · Feb 2, 2025 · Feb 2, 2025
diff --git a/evaluation/benchmarks/the_agent_company/run_infer.py b/evaluation/benchmarks/the_agent_company/run_infer.py
@@ -21,6 +21,7 @@
     get_llm_config_arg,
     get_parser,
 )
+from openhands.core.config.agent_config import AgentConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.main import create_runtime, run_controller
 from openhands.events.action import CmdRunAction, MessageAction
@@ -34,6 +35,7 @@ def get_config(
     task_short_name: str,
     mount_path_on_host: str,
     llm_config: LLMConfig,
+    agent_config: AgentConfig,
 ) -> AppConfig:
     config = AppConfig(
         run_as_openhands=False,
@@ -57,6 +59,7 @@ def get_config(
         workspace_mount_path_in_sandbox='/outputs',
     )
     config.set_llm_config(llm_config)
+    config.set_agent_config(agent_config)
     return config
 
 
@@ -152,6 +155,12 @@ def run_solver(
                     os.path.join(screenshots_dir, f'{image_id}.png'), 'wb'
                 ) as file:
                     file.write(image_data)
+                if obs.set_of_marks:
+                    som_image_data = base64.b64decode(obs.set_of_marks)
+                    with open(
+                        os.path.join(screenshots_dir, f'{image_id}_som.png'), 'wb'
+                    ) as file:
+                        file.write(som_image_data)
 
     if save_final_state:
         os.makedirs(state_dir, exist_ok=True)
@@ -214,6 +223,14 @@ def run_evaluator(
     )
     args, _ = parser.parse_known_args()
 
+    #### TODO: parse this from cli and toml ####
+    agent_config: AgentConfig | None = None
+    agent_config = AgentConfig(
+        enable_som_visual_browsing=True,
+        disabled_microagents=['github'],
+    )
+    ############################################
+
     agent_llm_config: LLMConfig | None = None
     if args.agent_llm_config:
         agent_llm_config = get_llm_config_arg(args.agent_llm_config)
@@ -254,7 +271,7 @@ def run_evaluator(
     else:
         temp_dir = tempfile.mkdtemp()
     config: AppConfig = get_config(
-        args.task_image_name, task_short_name, temp_dir, agent_llm_config
+        args.task_image_name, task_short_name, temp_dir, agent_llm_config, agent_config
     )
     runtime: Runtime = create_runtime(config)
     call_async_from_sync(runtime.connect)