Skip to content

Commit f850887

Browse files
aryanA101abenoit74
authored andcommitted
extended the handle_user_provided_file api to include user-agent support
1 parent 4d5d10e commit f850887

File tree

4 files changed

+50
-7
lines changed

4 files changed

+50
-7
lines changed

src/zimscraperlib/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
NAME = pathlib.Path(__file__).parent.name
1212
SCRAPER = f"{NAME} {__version__}"
1313
CONTACT = "[email protected]"
14+
DEFAULT_USER_AGENT = f"{NAME}/{__version__} ({CONTACT})"
1415

1516
UTF8 = "UTF-8"
1617

src/zimscraperlib/inputs.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,13 @@
99
from typing import Optional, Union
1010

1111
from zimscraperlib import logger
12-
from zimscraperlib.constants import (
13-
CONTACT,
14-
)
12+
from zimscraperlib.constants import DEFAULT_USER_AGENT
1513
from zimscraperlib.constants import (
1614
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
1715
)
1816
from zimscraperlib.constants import (
1917
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
2018
)
21-
from zimscraperlib.constants import (
22-
SCRAPER as PROJECT_NAME,
23-
)
2419
from zimscraperlib.download import stream_file
2520

2621

@@ -29,6 +24,7 @@ def handle_user_provided_file(
2924
dest: Optional[pathlib.Path] = None,
3025
in_dir: Optional[pathlib.Path] = None,
3126
nocopy: bool = False, # noqa: FBT001, FBT002
27+
user_agent: Optional[str] = DEFAULT_USER_AGENT,
3228
) -> Union[pathlib.Path, None]:
3329
"""path to downloaded or copied a user provided file (URL or path)
3430
@@ -50,7 +46,7 @@ def handle_user_provided_file(
5046

5147
if str(source).startswith("http"):
5248
logger.debug(f"download {source} -> {dest}")
53-
headers = {"User-Agent": f"{PROJECT_NAME.replace(' ','/')} ({CONTACT})"}
49+
headers = {"User-Agent": user_agent} if user_agent else None
5450
stream_file(url=str(source), fpath=dest, headers=headers)
5551
else:
5652
source = pathlib.Path(source).expanduser().resolve()

tests/conftest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,16 @@ def webp_image():
120120
return file_src("ninja.webp")
121121

122122

123+
@pytest.fixture(scope="module")
124+
def valid_user_agent():
125+
return "name/version (contact)"
126+
127+
128+
@pytest.fixture(scope="module")
129+
def invalid_user_agent():
130+
return "name version) (contact)"
131+
132+
123133
@pytest.fixture(scope="session")
124134
def small_zim_file(tmpdir_factory):
125135
from zimscraperlib.download import stream_file

tests/inputs/test_inputs.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,17 @@
66

77
import pytest
88

9+
import zimscraperlib
10+
from zimscraperlib.constants import CONTACT
911
from zimscraperlib.constants import (
1012
MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LENGTH,
1113
)
1214
from zimscraperlib.constants import (
1315
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LENGTH,
1416
)
17+
from zimscraperlib.constants import (
18+
NAME as PROJECT_NAME,
19+
)
1520
from zimscraperlib.inputs import compute_descriptions, handle_user_provided_file
1621

1722

@@ -80,6 +85,37 @@ def test_remote_indir(tmp_path, valid_http_url):
8085
assert fpath.parent == tmp_path
8186

8287

88+
def test_remote_default_user_agent(valid_http_url, monkeypatch):
89+
def mock_stream_file(**kwargs):
90+
headers = kwargs.get("headers")
91+
assert headers is not None
92+
user_agent = headers.get("User-Agent")
93+
assert isinstance(user_agent, str)
94+
assert user_agent.startswith(PROJECT_NAME)
95+
assert user_agent.endswith(f"({CONTACT})")
96+
97+
monkeypatch.setattr(
98+
zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue]
99+
"stream_file",
100+
mock_stream_file,
101+
raising=True,
102+
)
103+
handle_user_provided_file(source=valid_http_url)
104+
105+
106+
def test_remote_provided_none_user_agent(valid_http_url, monkeypatch):
107+
def mock_stream_file(**kwargs):
108+
assert kwargs.get("headers") is None
109+
110+
monkeypatch.setattr(
111+
zimscraperlib.inputs, # pyright: ignore[reportAttributeAccessIssue]
112+
"stream_file",
113+
mock_stream_file,
114+
raising=True,
115+
)
116+
handle_user_provided_file(source=valid_http_url, user_agent=None)
117+
118+
83119
TEXT_NOT_USED = "text not used"
84120

85121
LONG_TEXT = (

0 commit comments

Comments
 (0)