forked from openai/openai-cua-sample-app
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple_cua_loop.py
95 lines (73 loc) · 3.17 KB
/
simple_cua_loop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from computers import Computer
from computers import LocalPlaywrightComputer
from utils import create_response, check_blocklisted_url
def acknowledge_safety_check_callback(message: str) -> bool:
response = input(
f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): "
).lower()
return response.strip() == "y"
def handle_item(item, computer: Computer):
"""Handle each item; may cause a computer action + screenshot."""
if item["type"] == "message": # print messages
print(item["content"][0]["text"])
if item["type"] == "computer_call": # perform computer actions
action = item["action"]
action_type = action["type"]
action_args = {k: v for k, v in action.items() if k != "type"}
print(f"{action_type}({action_args})")
# give our computer environment action to perform
getattr(computer, action_type)(**action_args)
screenshot_base64 = computer.screenshot()
pending_checks = item.get("pending_safety_checks", [])
for check in pending_checks:
if not acknowledge_safety_check_callback(check["message"]):
raise ValueError(f"Safety check failed: {check['message']}")
# return value informs model of the latest screenshot
call_output = {
"type": "computer_call_output",
"call_id": item["call_id"],
"acknowledged_safety_checks": pending_checks,
"output": {
"type": "input_image",
"image_url": f"data:image/png;base64,{screenshot_base64}",
},
}
# additional URL safety checks for browser environments
if computer.environment == "browser":
current_url = computer.get_current_url()
call_output["output"]["current_url"] = current_url
check_blocklisted_url(current_url)
return [call_output]
return []
def main():
"""Run the CUA (Computer Use Assistant) loop, using Local Playwright."""
with LocalPlaywrightComputer() as computer:
tools = [
{
"type": "computer-preview",
"display_width": computer.dimensions[0],
"display_height": computer.dimensions[1],
"environment": computer.environment,
}
]
items = []
while True: # get user input forever
user_input = input("> ")
items.append({"role": "user", "content": user_input})
while True: # keep looping until we get a final response
response = create_response(
model="computer-use-preview-2025-02-04",
input=items,
tools=tools,
truncation="auto",
)
if "output" not in response:
print(response)
raise ValueError("No output from model")
items += response["output"]
for item in response["output"]:
items += handle_item(item, computer)
if items[-1].get("role") == "assistant":
break
if __name__ == "__main__":
main()