Skip to content

Commit 0f545c0

Browse files
committed
Review comments
1 parent d3d409a commit 0f545c0

File tree

3 files changed

+32
-3
lines changed

3 files changed

+32
-3
lines changed

src/crawlee/fingerprint_suite/_browserforge_adapter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@ def _get_single_browser_type(self, browser: Iterable[str | Browser] | None) -> s
136136
"""
137137
# In our case we never pass more than one browser type. In general case more browsers are just bigger pool to
138138
# select from, so narrowing it to any of them is still a valid action as we are going to pick just one anyway.
139-
139+
if isinstance(browser, str):
140+
return browser
140141
if isinstance(browser, Iterable):
141142
choice = random.choice(
142143
[

tests/unit/crawlers/_playwright/test_playwright_crawler.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
)
3131
from crawlee.fingerprint_suite._browserforge_adapter import get_available_header_values
3232
from crawlee.fingerprint_suite._consts import BROWSER_TYPE_HEADER_KEYWORD
33+
from crawlee.fingerprint_suite._header_generator import fingerprint_browser_type_from_playwright_browser_type
3334
from crawlee.http_clients import HttpxHttpClient
3435
from crawlee.proxy_configuration import ProxyConfiguration
3536
from crawlee.sessions import Session, SessionPool
@@ -215,7 +216,10 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
215216

216217
user_agent = headers.get('user-agent')
217218
assert user_agent in get_available_header_values(header_network, {'user-agent', 'User-Agent'}), user_agent
218-
assert any(keyword in user_agent for keyword in BROWSER_TYPE_HEADER_KEYWORD['chrome']), user_agent
219+
assert any(
220+
keyword in user_agent
221+
for keyword in BROWSER_TYPE_HEADER_KEYWORD[fingerprint_browser_type_from_playwright_browser_type(browser_type)]
222+
), user_agent
219223

220224
assert headers.get('sec-ch-ua') in get_available_header_values(header_network, 'sec-ch-ua')
221225
assert headers.get('sec-ch-ua-mobile') in get_available_header_values(header_network, 'sec-ch-ua-mobile')
@@ -249,7 +253,10 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
249253

250254
user_agent = headers.get('user-agent')
251255
assert user_agent in get_available_header_values(header_network, {'user-agent', 'User-Agent'})
252-
assert any(keyword in user_agent for keyword in BROWSER_TYPE_HEADER_KEYWORD[browser_type])
256+
assert any(
257+
keyword in user_agent
258+
for keyword in BROWSER_TYPE_HEADER_KEYWORD[fingerprint_browser_type_from_playwright_browser_type(browser_type)]
259+
)
253260

254261

255262
async def test_custom_headers(server_url: URL) -> None:

tests/unit/fingerprint_suite/test_adapters.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
1+
from collections.abc import Iterable
2+
3+
import pytest
4+
from browserforge.headers import Browser
5+
16
from crawlee.fingerprint_suite import (
27
DefaultFingerprintGenerator,
38
HeaderGeneratorOptions,
49
ScreenOptions,
510
)
11+
from crawlee.fingerprint_suite._browserforge_adapter import PatchedHeaderGenerator
12+
from crawlee.fingerprint_suite._consts import BROWSER_TYPE_HEADER_KEYWORD
613

714

815
def test_fingerprint_generator_has_default() -> None:
@@ -64,3 +71,17 @@ def test_fingerprint_generator_all_options() -> None:
6471
assert 'Firefox' in fingerprint.navigator.userAgent
6572
assert 'Win' in fingerprint.navigator.oscpu
6673
assert 'en-US' in fingerprint.navigator.languages
74+
75+
76+
@pytest.mark.parametrize(
77+
'browser',
78+
[
79+
'firefox',
80+
['firefox'],
81+
[Browser(name='firefox')],
82+
],
83+
)
84+
def test_patched_header_generator_generate(browser: Iterable[str | Browser]) -> None:
85+
"""Test that PatchedHeaderGenerator works with all the possible types correctly."""
86+
header = PatchedHeaderGenerator().generate(browser=browser)
87+
assert any(keyword in header['User-Agent'] for keyword in BROWSER_TYPE_HEADER_KEYWORD['firefox'])

0 commit comments

Comments
 (0)