Skip to content

Commit ae9d2b0

Browse files
update kwargs
1 parent 7068412 commit ae9d2b0

File tree

4 files changed

+157
-27
lines changed

4 files changed

+157
-27
lines changed

stagehand/handlers/act_handler.py

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,40 +35,83 @@ def __init__(
3535
self.user_provided_instructions = user_provided_instructions
3636
self.self_heal = self_heal
3737

38-
async def act(self, options: Union[ActOptions, ObserveResult]) -> ActResult:
38+
async def act(
39+
self,
40+
options_or_action: Union[ActOptions, ObserveResult, str, dict, None] = None,
41+
**kwargs
42+
) -> ActResult:
3943
"""
4044
Perform an act based on an instruction.
4145
This method will observe the page and then perform the act on the first element returned.
46+
47+
Args:
48+
options_or_action: ActOptions, ObserveResult, action string, dict, or None
49+
**kwargs: Additional options to be merged
50+
51+
Returns:
52+
ActResult instance
4253
"""
43-
if "selector" in options and "method" in options:
44-
options = ObserveResult(**options)
54+
# Handle ObserveResult case first (legacy compatibility)
55+
if isinstance(options_or_action, ObserveResult):
4556
return await self._act_from_observe_result(
46-
options, self.stagehand.dom_settle_timeout_ms
57+
options_or_action, self.stagehand.dom_settle_timeout_ms
58+
)
59+
60+
# Handle the new flexible parameter format
61+
options: Optional[ActOptions] = None
62+
options_dict = {}
63+
64+
if isinstance(options_or_action, ActOptions):
65+
options_dict = options_or_action.model_dump()
66+
elif isinstance(options_or_action, dict):
67+
# Check if it's actually an ObserveResult dict
68+
if "selector" in options_or_action and "method" in options_or_action:
69+
observe_result = ObserveResult(**options_or_action)
70+
return await self._act_from_observe_result(
71+
observe_result, self.stagehand.dom_settle_timeout_ms
72+
)
73+
options_dict = options_or_action.copy()
74+
elif isinstance(options_or_action, str):
75+
options_dict["action"] = options_or_action
76+
77+
options_dict.update(kwargs)
78+
79+
# Validate options if we have any
80+
if options_dict:
81+
try:
82+
options = ActOptions(**options_dict)
83+
except Exception as e:
84+
self.logger.error(f"Invalid act options: {e}")
85+
raise
86+
87+
if not options or not options.action:
88+
return ActResult(
89+
success=False,
90+
message="No action provided for act operation",
91+
action="",
4792
)
4893

4994
# Start inference timer if available
5095
if hasattr(self.stagehand, "start_inference_timer"):
5196
self.stagehand.start_inference_timer()
5297

53-
action_task = options.get("action")
98+
action_task = options.action
5499
self.logger.info(
55100
f"Starting action for task: '{action_task}'",
56101
category="act",
57102
)
58103
prompt = build_act_observe_prompt(
59104
action=action_task,
60105
supported_actions=list(method_handler_map.keys()),
61-
variables=options.get("variables"),
106+
variables=options.variables,
62107
)
63108

64109
observe_options_dict = {"instruction": prompt}
65110
# Add other observe options from ActOptions if they exist
66-
if options.get("model_name"):
67-
observe_options_dict["model_name"] = options.get("model_name")
68-
if options.get("model_client_options"):
69-
observe_options_dict["model_client_options"] = options.get(
70-
"model_client_options"
71-
)
111+
if options.model_name:
112+
observe_options_dict["model_name"] = options.model_name
113+
if options.model_client_options:
114+
observe_options_dict["model_client_options"] = options.model_client_options
72115

73116
observe_options = ObserveOptions(**observe_options_dict)
74117

@@ -93,16 +136,16 @@ async def act(self, options: Union[ActOptions, ObserveResult]) -> ActResult:
93136
element_to_act_on = observe_results[0]
94137

95138
# Substitute variables in arguments
96-
if options.get("variables"):
97-
variables = options.get("variables", {})
139+
if options.variables:
140+
variables = options.variables
98141
element_to_act_on.arguments = [
99142
str(arg).replace(f"%{key}%", str(value))
100143
for arg in (element_to_act_on.arguments or [])
101144
for key, value in variables.items()
102145
]
103146

104147
# domSettleTimeoutMs might come from options if specified for act
105-
dom_settle_timeout_ms = options.get("dom_settle_timeout_ms")
148+
dom_settle_timeout_ms = options.dom_settle_timeout_ms
106149

107150
try:
108151
await self._perform_playwright_method(

stagehand/handlers/cua_handler.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import asyncio
22
import base64
3-
from typing import Any, Optional
3+
from typing import Any, Optional, Union
44

55
from ..types.agent import (
66
ActionExecutionResult,
@@ -33,8 +33,49 @@ async def get_screenshot_base64(self) -> str:
3333
screenshot_bytes = await self.page.screenshot(full_page=False, type="png")
3434
return base64.b64encode(screenshot_bytes).decode()
3535

36-
async def perform_action(self, action: AgentAction) -> ActionExecutionResult:
37-
"""Execute a single action on the page."""
36+
async def perform_action(
37+
self,
38+
action_or_dict: Union[AgentAction, dict, None] = None,
39+
**kwargs
40+
) -> ActionExecutionResult:
41+
"""
42+
Execute a single action on the page.
43+
44+
Args:
45+
action_or_dict: AgentAction, dict, or None
46+
**kwargs: Additional action parameters to be merged
47+
48+
Returns:
49+
ActionExecutionResult dict
50+
"""
51+
action: Optional[AgentAction] = None
52+
action_dict = {}
53+
54+
if isinstance(action_or_dict, AgentAction):
55+
action_dict = action_or_dict.model_dump()
56+
elif isinstance(action_or_dict, dict):
57+
action_dict = action_or_dict.copy()
58+
59+
action_dict.update(kwargs)
60+
61+
# Validate action if we have any
62+
if action_dict:
63+
try:
64+
action = AgentAction(**action_dict)
65+
except Exception as e:
66+
self.logger.error(f"Invalid agent action: {e}")
67+
raise
68+
69+
if not action:
70+
self.logger.error(
71+
"No action provided for perform_action",
72+
category=StagehandFunctionName.AGENT,
73+
)
74+
return {
75+
"success": False,
76+
"error": "No action provided",
77+
}
78+
3879
self.logger.info(
3980
f"Performing action: {action.action.root if action.action else ''}",
4081
category=StagehandFunctionName.AGENT,

stagehand/handlers/extract_handler.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Extract handler for performing data extraction from page elements using LLMs."""
22

3-
from typing import Optional, TypeVar
3+
from typing import Optional, TypeVar, Union
44

55
from pydantic import BaseModel
66

@@ -34,19 +34,41 @@ def __init__(
3434

3535
async def extract(
3636
self,
37-
options: Optional[ExtractOptions] = None,
37+
options_or_instruction: Union[ExtractOptions, str, dict, None] = None,
3838
schema: Optional[type[BaseModel]] = None,
39+
**kwargs,
3940
) -> ExtractResult:
4041
"""
4142
Execute an extraction operation locally.
4243
4344
Args:
44-
options: ExtractOptions containing the instruction and other parameters
45+
options_or_instruction: ExtractOptions, instruction string, dict, or None
4546
schema: Optional Pydantic model for structured output
47+
**kwargs: Additional options to be merged
4648
4749
Returns:
4850
ExtractResult instance
4951
"""
52+
options: Optional[ExtractOptions] = None
53+
options_dict = {}
54+
55+
if isinstance(options_or_instruction, ExtractOptions):
56+
options_dict = options_or_instruction.model_dump()
57+
elif isinstance(options_or_instruction, dict):
58+
options_dict = options_or_instruction.copy()
59+
elif isinstance(options_or_instruction, str):
60+
options_dict["instruction"] = options_or_instruction
61+
62+
options_dict.update(kwargs)
63+
64+
# Validate options if we have any
65+
if options_dict:
66+
try:
67+
options = ExtractOptions(**options_dict)
68+
except Exception as e:
69+
self.logger.error(f"Invalid extract options: {e}")
70+
raise
71+
5072
if not options:
5173
# If no options provided, extract the entire page text
5274
self.logger.info("Extracting entire page text")

stagehand/handlers/observe_handler.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Observe handler for performing observations of page elements using LLMs."""
22

3-
from typing import Any
3+
from typing import Any, Optional, Union
44

55
from stagehand.a11y.utils import get_accessibility_tree, get_xpath_by_resolved_object_id
66
from stagehand.llm.inference import observe as observe_inference
@@ -28,22 +28,45 @@ def __init__(
2828
self.logger = stagehand_client.logger
2929
self.user_provided_instructions = user_provided_instructions
3030

31-
# TODO: better kwargs
31+
3232
async def observe(
3333
self,
34-
options: ObserveOptions,
34+
options_or_instruction: Union[ObserveOptions, str, dict, None] = None,
3535
from_act: bool = False,
36+
**kwargs,
3637
) -> list[ObserveResult]:
3738
"""
3839
Execute an observation operation locally.
3940
4041
Args:
41-
options: ObserveOptions containing the instruction and other parameters
42+
options_or_instruction: ObserveOptions, instruction string, dict, or None
43+
from_act: Whether this observe is being called from act
44+
**kwargs: Additional options to be merged
4245
4346
Returns:
4447
list of ObserveResult instances
4548
"""
46-
instruction = options.instruction
49+
options: Optional[ObserveOptions] = None
50+
options_dict = {}
51+
52+
if isinstance(options_or_instruction, ObserveOptions):
53+
options_dict = options_or_instruction.model_dump()
54+
elif isinstance(options_or_instruction, dict):
55+
options_dict = options_or_instruction.copy()
56+
elif isinstance(options_or_instruction, str):
57+
options_dict["instruction"] = options_or_instruction
58+
59+
options_dict.update(kwargs)
60+
61+
# Validate options if we have any
62+
if options_dict:
63+
try:
64+
options = ObserveOptions(**options_dict)
65+
except Exception as e:
66+
self.logger.error(f"Invalid observe options: {e}")
67+
raise
68+
69+
instruction = options.instruction if options else None
4770
if not instruction:
4871
instruction = (
4972
"Find elements that can be used for any future actions in the page. "
@@ -117,7 +140,8 @@ async def observe(
117140
)
118141

119142
# Draw overlay if requested
120-
if options.draw_overlay:
143+
draw_overlay = options.draw_overlay if options else False
144+
if draw_overlay:
121145
await draw_observe_overlay(self.stagehand_page, elements_with_selectors)
122146

123147
# Return the list of results without trying to attach _llm_response

0 commit comments

Comments
 (0)