-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathddg_search.py
140 lines (113 loc) · 4.81 KB
/
ddg_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import asyncio
from typing import List, Dict
from utils.browsing import BrowserManager
class DuckDuckGoSearch:
"""Simple utility class for searching DuckDuckGo"""
def __init__(self):
"""Initialize the search utility"""
self.browser_manager = None
async def __aenter__(self):
"""Async context manager entry"""
await self.initialize()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit"""
await self.cleanup()
async def initialize(self):
"""Initialize the browser manager"""
self.browser_manager = BrowserManager()
await self.browser_manager.initialize()
return self
async def cleanup(self):
"""Clean up resources"""
if self.browser_manager:
await self.browser_manager.cleanup()
async def search_html(self, query: str, num_results: int = 10) -> List[Dict]:
"""
Search DuckDuckGo HTML version and return results
Args:
query: Search query string
num_results: Maximum number of results to return
Returns:
List of search results as dictionaries with keys:
- title: Result title
- link: Result URL
- snippet: Result description
- displayLink: Display URL
"""
if not self.browser_manager:
await self.initialize()
return await self.browser_manager.search_duckduckgo(query, num_results)
async def search_regular(self, query: str, num_results: int = 10) -> List[Dict]:
"""
Search regular DuckDuckGo (not HTML-only version) and return results
Args:
query: Search query string
num_results: Maximum number of results to return
Returns:
List of search results as dictionaries with keys:
- title: Result title
- link: Result URL
- snippet: Result description
- displayLink: Display URL
"""
if not self.browser_manager:
await self.initialize()
return await self.browser_manager.search_duckduckgo_regular(query, num_results)
async def search(self, query: str, num_results: int = 10) -> List[Dict]:
"""
Search DuckDuckGo with fallback mechanism:
1. Try regular DuckDuckGo first
2. If that fails, try HTML DuckDuckGo
Args:
query: Search query string
num_results: Maximum number of results to return
Returns:
List of search results as dictionaries
"""
if not self.browser_manager:
await self.initialize()
# Try regular DuckDuckGo first
try:
results = await self.search_regular(query, num_results)
if results and len(results) > 0:
return results
except Exception as e:
print(f"Regular DuckDuckGo search failed: {str(e)}")
# Fall back to HTML DuckDuckGo
try:
return await self.search_html(query, num_results)
except Exception as e:
print(f"HTML DuckDuckGo search failed: {str(e)}")
return [] # Return empty list if both methods fail
async def main():
"""Example usage of DuckDuckGoSearch"""
async with DuckDuckGoSearch() as ddg:
# Search query
query = "weather"
# Try regular DuckDuckGo
print(f"\n--- Regular DuckDuckGo Search for '{query}' ---")
regular_results = await ddg.search_regular(query, num_results=3)
for i, result in enumerate(regular_results, 1):
print(f"\nResult {i}:")
print(f"Title: {result['title']}")
print(f"URL: {result['link']}")
print(f"Snippet: {result['snippet']}")
# Try HTML DuckDuckGo
print(f"\n--- HTML DuckDuckGo Search for '{query}' ---")
html_results = await ddg.search_html(query, num_results=3)
for i, result in enumerate(html_results, 1):
print(f"\nResult {i}:")
print(f"Title: {result['title']}")
print(f"URL: {result['link']}")
print(f"Snippet: {result['snippet']}")
# Try combined search with fallback
print(f"\n--- Combined Search with Fallback for '{query}' ---")
combined_results = await ddg.search(query, num_results=3)
for i, result in enumerate(combined_results, 1):
print(f"\nResult {i}:")
print(f"Title: {result['title']}")
print(f"URL: {result['link']}")
print(f"Snippet: {result['snippet']}")
if __name__ == "__main__":
asyncio.run(main())