-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrowser.py
78 lines (72 loc) · 2.27 KB
/
browser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from botasaurus.browser import browser, Driver
from botasaurus.user_agent import UserAgent
from botasaurus.window_size import WindowSize
from botasaurus.user_agent import UserAgent
from botasaurus.window_size import WindowSize
from chrome_extension_python import Extension
from botasaurus.lang import Lang
import typer
from callback import get_home_page
from tool import get_meta_data
@browser(
extensions=[
Extension(
# "https://chromewebstore.google.com/detail/adblock-%E2%80%94-best-ad-blocker/gighmmpiobklfepjocnamgkkbiglidom"
"https://chromewebstore.google.com/detail/ublock-origin/cjpalhdlnbpafiamejdnhcphjbkeiagm"
)
],
output=None,
reuse_driver=True,
# run_async=True,
lang=Lang.Chinese,
add_arguments=["--mute-audio"],
close_on_crash=True,
raise_exception=True,
tiny_profile=True,
headless=lambda x: x["headless"],
profile=lambda x: x["profile"],
user_agent=lambda x: x["user_agent"],
window_size=lambda x: x["window_size"],
proxy=lambda x: x["proxy"],
wait_for_complete_page_load=False,
max_retry=3,
)
def browser_driver(driver: Driver, data):
res = data["callback"](driver, data)
meta_data = get_meta_data(driver)
return [res, meta_data]
def run_browser(
url, callback: any = get_home_page, headless: bool = False, output_dir: str = ""
):
"""
目前用不着两个profile, 一个就行了
"""
user_profile = [
{
"url": url,
"callback": callback,
"profile": "pikachu",
"headless": headless,
"user_agent": UserAgent.HASHED,
"window_size": WindowSize.HASHED,
"proxy": "http://127.0.0.1:7890",
"output_dir": output_dir,
"waitTime": 2,
"timeout": 120,
},
{
"url": url,
"callback": callback,
"profile": "pikachu2",
"headless": headless,
"user_agent": UserAgent.HASHED,
"window_size": WindowSize.HASHED,
"proxy": "http://127.0.0.1:7890",
"output_dir": output_dir,
"waitTime": 2,
"timeout": 120,
},
][0]
return browser_driver(user_profile)
if __name__ == "__main__":
typer.run(run_browser)