From ae475235b985f0e117ff3f48e827c4911b145755 Mon Sep 17 00:00:00 2001 From: vvincent1234 Date: Mon, 13 Jan 2025 21:31:37 +0800 Subject: [PATCH] update requirements --- requirements.txt | 4 +-- src/utils/utils.py | 65 +++++++++++++++++++++++----------------------- webui.py | 20 +++++++++----- 3 files changed, 48 insertions(+), 41 deletions(-) diff --git a/requirements.txt b/requirements.txt index 852269e..faf4b2c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -browser-use>=0.1.18 -langchain-google-genai>=2.0.8 +browser-use==0.1.18 +langchain-google-genai==2.0.8 pyperclip gradio langchain-ollama diff --git a/src/utils/utils.py b/src/utils/utils.py index 7e05dda..d5a8b25 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -164,38 +164,39 @@ def get_latest_files(directory: str, file_types: list = ['.webm', '.zip']) -> Di print(f"Error getting latest {file_type} file: {e}") return latest_files -async def capture_screenshot(browser_context) -> str: +async def capture_screenshot(browser_context): """Capture and encode a screenshot""" - try: - # Extract the Playwright browser instance - playwright_browser = browser_context.browser.playwright_browser # Ensure this is correct. - - # Check if the browser instance is valid and if an existing context can be reused - if playwright_browser and playwright_browser.contexts: - playwright_context = playwright_browser.contexts[0] - - # Access pages in the context - if playwright_context: - pages = playwright_context.pages - - # Use an existing page or create a new one if none exist - if pages: - active_page = pages[0] - for page in pages: - if page.url != "about:blank": - active_page = page - - # Take screenshot - try: - screenshot = await active_page.screenshot( - type='jpeg', - quality=75, - scale="css" - ) - encoded = base64.b64encode(screenshot).decode('utf-8') - return f'' - except Exception as e: - return f"

Waiting for browser session...

" + # Extract the Playwright browser instance + playwright_browser = browser_context.browser.playwright_browser # Ensure this is correct. + + # Check if the browser instance is valid and if an existing context can be reused + if playwright_browser and playwright_browser.contexts: + playwright_context = playwright_browser.contexts[0] + else: + return None + # Access pages in the context + pages = None + if playwright_context: + pages = playwright_context.pages + + # Use an existing page or create a new one if none exist + if pages: + active_page = pages[0] + for page in pages: + if page.url != "about:blank": + active_page = page + else: + return None + + # Take screenshot + try: + screenshot = await active_page.screenshot( + type='jpeg', + quality=75, + scale="css" + ) + encoded = base64.b64encode(screenshot).decode('utf-8') + return encoded except Exception as e: - return f"

Waiting for browser session...

" + return None diff --git a/webui.py b/webui.py index 85d44a7..4e2791a 100644 --- a/webui.py +++ b/webui.py @@ -408,6 +408,8 @@ async def run_with_stream( max_actions_per_step, tool_call_in_content ): + stream_vw = 80 + stream_vh = int(80 * window_h // window_w) if not headless: result = await run_browser_agent( agent_type=agent_type, @@ -433,7 +435,7 @@ async def run_with_stream( tool_call_in_content=tool_call_in_content ) # Add HTML content at the start of the result array - html_content = "

Using browser...

" + html_content = f"

Using browser...

" yield [html_content] + list(result) else: try: @@ -465,7 +467,7 @@ async def run_with_stream( ) # Initialize values for streaming - html_content = "

Using browser...

" + html_content = f"

Using browser...

" final_result = errors = model_actions = model_thoughts = "" latest_videos = trace = None @@ -473,9 +475,13 @@ async def run_with_stream( # Periodically update the stream while the agent task is running while not agent_task.done(): try: - html_content = await capture_screenshot(_global_browser_context) + encoded_screenshot = await capture_screenshot(_global_browser_context) + if encoded_screenshot is not None: + html_content = f'' + else: + html_content = f"

Waiting for browser session...

" except Exception as e: - html_content = f"

Waiting for browser session...

" + html_content = f"

Waiting for browser session...

" yield [ html_content, @@ -488,7 +494,7 @@ async def run_with_stream( gr.update(value="Stop", interactive=True), # Re-enable stop button gr.update(value="Run", interactive=True) # Re-enable run button ] - await asyncio.sleep(0.01) + await asyncio.sleep(0.05) # Once the agent task completes, get the results try: @@ -515,7 +521,7 @@ async def run_with_stream( except Exception as e: import traceback yield [ - f"

Waiting for browser session...

", + f"

Waiting for browser session...

", "", f"Error: {str(e)}\n{traceback.format_exc()}", "", @@ -740,7 +746,7 @@ def create_ui(theme_name="Ocean"): with gr.Row(): browser_view = gr.HTML( - value="

Waiting for browser session...

", + value="

Waiting for browser session...

", label="Live Browser View", )