Skip to content

Commit 453afc0

Browse files
authored
Merge pull request #141 from aryanA101a/useragent
set a user-agent for handle_user_provided_file
2 parents 538f8af + f114d38 commit 453afc0

File tree

4 files changed

+64
-1
lines changed

4 files changed

+64
-1
lines changed

src/zimscraperlib/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
ROOT_DIR = pathlib.Path(__file__).parent
1111
NAME = pathlib.Path(__file__).parent.name
1212
SCRAPER = f"{NAME} {__version__}"
13+
CONTACT = "[email protected]"
14+
DEFAULT_USER_AGENT = f"{NAME}/{__version__} ({CONTACT})"
1315

1416
UTF8 = "UTF-8"
1517

src/zimscraperlib/inputs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from typing import Optional, Union
1010

1111
from zimscraperlib import logger
12+
from zimscraperlib.constants import DEFAULT_USER_AGENT
1213
from zimscraperlib.constants import (
1314
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
1415
)
@@ -23,6 +24,7 @@ def handle_user_provided_file(
2324
dest: Optional[pathlib.Path] = None,
2425
in_dir: Optional[pathlib.Path] = None,
2526
nocopy: bool = False, # noqa: FBT001, FBT002
27+
user_agent: Optional[str] = DEFAULT_USER_AGENT,
2628
) -> Union[pathlib.Path, None]:
2729
"""path to downloaded or copied a user provided file (URL or path)
2830
@@ -44,7 +46,8 @@ def handle_user_provided_file(
4446

4547
if str(source).startswith("http"):
4648
logger.debug(f"download {source} -> {dest}")
47-
stream_file(url=str(source), fpath=dest)
49+
headers = {"User-Agent": user_agent} if user_agent else None
50+
stream_file(url=str(source), fpath=dest, headers=headers)
4851
else:
4952
source = pathlib.Path(source).expanduser().resolve()
5053
if not source.exists():

tests/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,11 @@ def webp_image():
120120
return file_src("ninja.webp")
121121

122122

123+
@pytest.fixture(scope="module")
124+
def valid_user_agent():
125+
return "name/version (contact)"
126+
127+
123128
@pytest.fixture(scope="session")
124129
def small_zim_file(tmpdir_factory):
125130
from zimscraperlib.download import stream_file

tests/inputs/test_inputs.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,17 @@
66

77
import pytest
88

9+
import zimscraperlib
10+
from zimscraperlib.constants import CONTACT
911
from zimscraperlib.constants import (
1012
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
1113
)
1214
from zimscraperlib.constants import (
1315
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
1416
)
17+
from zimscraperlib.constants import (
18+
NAME as PROJECT_NAME,
19+
)
1520
from zimscraperlib.inputs import compute_descriptions, handle_user_provided_file
1621

1722

@@ -80,6 +85,54 @@ def test_remote_indir(tmp_path, valid_http_url):
8085
assert fpath.parent == tmp_path
8186

8287

88+
def test_remote_default_user_agent(valid_http_url, monkeypatch):
89+
def mock_stream_file(**kwargs):
90+
headers = kwargs.get("headers")
91+
assert headers is not None
92+
user_agent = headers.get("User-Agent")
93+
assert isinstance(user_agent, str)
94+
assert user_agent.startswith(f"{PROJECT_NAME}/")
95+
assert user_agent.endswith(f"({CONTACT})")
96+
97+
monkeypatch.setattr(
98+
zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue]
99+
"stream_file",
100+
mock_stream_file,
101+
raising=True,
102+
)
103+
handle_user_provided_file(source=valid_http_url)
104+
105+
106+
def test_remote_provided_user_agent(valid_http_url, valid_user_agent, monkeypatch):
107+
def mock_stream_file(**kwargs):
108+
headers = kwargs.get("headers")
109+
assert headers is not None
110+
user_agent = headers.get("User-Agent")
111+
assert isinstance(user_agent, str)
112+
assert user_agent == valid_user_agent
113+
114+
monkeypatch.setattr(
115+
zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue]
116+
"stream_file",
117+
mock_stream_file,
118+
raising=True,
119+
)
120+
handle_user_provided_file(source=valid_http_url, user_agent=valid_user_agent)
121+
122+
123+
def test_remote_provided_none_user_agent(valid_http_url, monkeypatch):
124+
def mock_stream_file(**kwargs):
125+
assert kwargs.get("headers") is None
126+
127+
monkeypatch.setattr(
128+
zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue]
129+
"stream_file",
130+
mock_stream_file,
131+
raising=True,
132+
)
133+
handle_user_provided_file(source=valid_http_url, user_agent=None)
134+
135+
83136
TEXT_NOT_USED = "text not used"
84137

85138
LONG_TEXT = (

0 commit comments

Comments
 (0)