Skip to content

Commit 1caefe4

Browse files
committed
Enhance capture_page function with improved error handling and page load management
1 parent 6e30df1 commit 1caefe4

File tree

1 file changed

+63
-71
lines changed

1 file changed

+63
-71
lines changed

src/modules/apps/__init__.py

+63-71
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import gradio as gr
22
import tempfile
33
import os
4+
import time
5+
import traceback
46
from selenium import webdriver
57
from selenium.webdriver.chrome.options import Options
68
from selenium.webdriver.chrome.service import Service
79
from selenium.webdriver.support.ui import WebDriverWait
810
from selenium.webdriver.support import expected_conditions as EC
11+
from selenium.common.exceptions import TimeoutException
912
from subprocess import PIPE, STDOUT
10-
import traceback
13+
import psutil
1114

1215
print("Gradio app loaded.")
1316

@@ -20,67 +23,65 @@ def capture_page(url: str, output_file: str = "screenshot.png"):
2023
"""
2124
options = Options()
2225

23-
# Basic options
24-
options.add_argument('--headless=new') # New headless mode
25-
options.add_argument('--no-sandbox') # Required in Docker
26-
options.add_argument('--disable-dev-shm-usage') # Required in Docker
27-
28-
# Performance and stability options
29-
options.add_argument('--disable-gpu') # Required in Docker
26+
# Use new headless mode and basic options for Docker
27+
options.add_argument('--headless=new')
28+
options.add_argument('--no-sandbox')
29+
options.add_argument('--disable-dev-shm-usage')
30+
options.add_argument('--disable-gpu')
3031
options.add_argument('--disable-software-rasterizer')
3132
options.add_argument('--disable-extensions')
3233
options.add_argument('--disable-infobars')
33-
34-
# Resource configuration
3534
options.add_argument('--window-size=1920,1080')
3635
options.add_argument('--disable-features=NetworkService,NetworkServiceInProcess')
3736
options.add_argument('--disable-features=site-per-process')
38-
39-
# Memory and process settings
40-
options.add_argument('--single-process') # Run in single process mode
37+
options.add_argument('--single-process')
4138
options.add_argument('--memory-pressure-off')
4239
options.add_argument('--disable-crash-reporter')
43-
options.add_argument('--disable-breakpad') # Disable crash reporting
44-
45-
# Additional stability options
40+
options.add_argument('--disable-breakpad')
4641
options.add_argument('--ignore-certificate-errors')
4742
options.add_argument('--disable-setuid-sandbox')
4843
options.add_argument('--disable-web-security')
49-
50-
# Set specific shared memory /dev/shm size (if needed)
51-
options.add_argument('--disable-dev-shm-usage')
5244
options.add_argument('--shm-size=2g')
5345

54-
# Set up Chrome service with explicit path to chromedriver and logging
46+
# Set page load strategy to 'none' to avoid waiting indefinitely
47+
options.page_load_strategy = "none"
48+
49+
# Set up Chrome service (ensure chromedriver is in your PATH)
5550
service = Service(
56-
# executable_path='/usr/local/bin/chromedriver',
57-
log_output=PIPE, # Redirect logs to pipe
58-
service_args=['--verbose'] # Enable verbose logging
51+
log_output=PIPE,
52+
service_args=['--verbose']
5953
)
6054

55+
driver = None
6156
try:
6257
print("Initializing Chrome...")
63-
driver = webdriver.Chrome(
64-
service=service,
65-
options=options
66-
)
67-
58+
driver = webdriver.Chrome(service=service, options=options)
59+
if not driver:
60+
raise Exception("Failed to initialize Chrome driver")
61+
6862
print("Chrome initialized successfully")
63+
driver.implicitly_wait(5)
6964

7065
try:
71-
print(f"Navigating to URL: {url}")
72-
driver.get(url)
66+
# Set a 30-second timeout for page load
67+
driver.set_page_load_timeout(30)
68+
try:
69+
print(f"Navigating to URL: {url}")
70+
driver.get(url)
71+
except TimeoutException:
72+
print("Page load timed out. Proceeding with screenshot capture...")
7373

74-
# Wait for page load
75-
print("Waiting for page to load...")
76-
driver.implicitly_wait(10) # Increased wait time
74+
# Wait for the document ready state to be 'interactive' or 'complete'
75+
try:
76+
print("Waiting for document ready state...")
77+
WebDriverWait(driver, 30).until(
78+
lambda d: d.execute_script('return document.readyState') in ["interactive", "complete"]
79+
)
80+
except TimeoutException:
81+
print("Document did not reach ready state within timeout, proceeding anyway.")
7782

78-
# Additional wait for dynamic content
79-
from selenium.webdriver.support.ui import WebDriverWait
80-
from selenium.webdriver.support import expected_conditions as EC
81-
# WebDriverWait(driver, 10).until(
82-
# lambda d: d.execute_script('return document.readyState') == 'complete'
83-
# )
83+
# Additional short delay to allow dynamic content to settle
84+
time.sleep(2)
8485

8586
print("Taking screenshot...")
8687
driver.save_screenshot(output_file)
@@ -92,18 +93,22 @@ def capture_page(url: str, output_file: str = "screenshot.png"):
9293
raise
9394
finally:
9495
print("Closing Chrome...")
95-
try:
96-
driver.close() # Close current window
97-
driver.quit() # Quit browser completely
98-
import psutil # For process cleanup
99-
current_pid = os.getpid()
100-
current_process = psutil.Process(current_pid)
101-
children = current_process.children(recursive=True)
102-
for child in children:
103-
if 'chrome' in child.name().lower():
104-
child.terminate()
105-
except Exception as cleanup_error:
106-
print(f"Error during cleanup: {cleanup_error}")
96+
# Wrap cleanup in a try/except to prevent errors if the session is already closed
97+
if driver:
98+
try:
99+
driver.quit()
100+
except Exception as cleanup_error:
101+
print(f"Error during driver.quit(): {cleanup_error}")
102+
103+
# Optionally clean up any lingering Chrome processes
104+
try:
105+
current_pid = os.getpid()
106+
current_process = psutil.Process(current_pid)
107+
for child in current_process.children(recursive=True):
108+
if 'chrome' in child.name().lower():
109+
child.terminate()
110+
except Exception as psutil_error:
111+
print(f"Error during process cleanup: {psutil_error}")
107112

108113
except Exception as e:
109114
print(f"Error initializing Chrome: {str(e)}")
@@ -112,33 +117,24 @@ def capture_page(url: str, output_file: str = "screenshot.png"):
112117
def capture_and_show(url: str):
113118
"""Capture webpage and return the image"""
114119
try:
115-
# Get the temporary directory path (defaulting to /tmp if TMPDIR is not set)
116120
temp_dir = os.getenv('TMPDIR', '/tmp')
117-
118121
try:
119-
# Ensure temp directory exists and has correct permissions
120122
os.makedirs(temp_dir, mode=0o777, exist_ok=True)
121123
print(f"Using temp directory: {temp_dir}")
122-
123-
# Verify directory is writable
124124
if not os.access(temp_dir, os.W_OK):
125125
print(f"Warning: Temp directory {temp_dir} is not writable")
126-
# Try to create a user-specific temp directory instead
127126
temp_dir = os.path.join('/tmp', f'chrome_screenshots_{os.getuid()}')
128127
os.makedirs(temp_dir, mode=0o777, exist_ok=True)
129128
print(f"Created user-specific temp directory: {temp_dir}")
130129

131-
# Create temporary file in the specified directory
132130
temp_path = os.path.join(temp_dir, f"screenshot_{os.urandom(8).hex()}.png")
133131
print(f"Temp file path: {temp_path}")
134132

135-
# Capture the webpage
136133
success = capture_page(url, temp_path)
137134
if not success:
138135
print("Screenshot capture returned False")
139136
return None
140-
141-
# Verify file was created
137+
142138
if not os.path.exists(temp_path):
143139
print("Screenshot file was not created")
144140
return None
@@ -183,7 +179,6 @@ def create_gradio_app():
183179

184180
def capture_with_error(url):
185181
try:
186-
# Basic URL validation
187182
if not url:
188183
return None, gr.update(visible=True, value="Please enter a URL")
189184
if not url.startswith(('http://', 'https://')):
@@ -196,7 +191,6 @@ def capture_with_error(url):
196191
except Exception as e:
197192
return None, gr.update(visible=True, value=f"Error: {str(e)}")
198193

199-
# Connect the components
200194
capture_btn.click(
201195
fn=capture_with_error,
202196
inputs=[url_input],
@@ -207,21 +201,19 @@ def capture_with_error(url):
207201

208202
app = create_gradio_app()
209203

210-
# Configure server settings for Docker deployment
211-
server_port = 7860 # Standard Gradio port
212-
server_name = "0.0.0.0" # Allow external connections
204+
server_port = 7860
205+
server_name = "0.0.0.0"
213206

214207
def main():
215-
"""Launch the Gradio application"""
216208
print("Starting Gradio server...")
217209
app.launch(
218210
server_name=server_name,
219211
server_port=server_port,
220-
share=False, # Disable sharing as we're running in Docker
221-
auth=None, # Can be configured if authentication is needed
222-
ssl_verify=False, # Disable SSL verification for internal Docker network
212+
share=False,
213+
auth=None,
214+
ssl_verify=False,
223215
show_error=True,
224216
favicon_path=None
225217
)
226218

227-
main()
219+
main()

0 commit comments

Comments
 (0)