Skip to content

Commit c383c3b

Browse files
authored
fix(pypi): make the URL/filename extraction from requirement more robust (#2871)
Summary: - Make the requirement line the same as the one that is used in whls. It only contains extras and the version if it is present. - Add debug log statements if we fail to get the version from a direct URL reference. - Move some tests from `parse_requirements_tests` to `index_sources_tests` to improve test maintenance. - Replace the URL encoded `+` to a regular `+` in the filename. - Correctly handle the case when the `=sha256:` is used in the URL. Once this is merged I plan to tackle #2648 by changing the `parse_requirements` code to de-duplicate entries returned by the `parse_requirements` function. I cannot think of anything else that we can do for this as of now, so will mark the associated issue as resolved. Fixes #2363 Work towards #2648
1 parent e54060b commit c383c3b

File tree

10 files changed

+132
-301
lines changed

10 files changed

+132
-301
lines changed

.editorconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ max_line_length = 100
1515
[*.{py,bzl}]
1616
indent_style = space
1717
indent_size = 4
18+
19+
# different overrides for git commit messages
20+
[.git/COMMIT_EDITMSG]
21+
max_line_length = 72

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ END_UNRELEASED_TEMPLATE
8686
multiple times.
8787
* (tools/wheelmaker.py) Extras are now preserved in Requires-Dist metadata when using requires_file
8888
to specify the requirements.
89+
* (pypi) Use bazel downloader for direct URL references and correctly detect the filenames from
90+
various URL formats - URL encoded version strings get correctly resolved, sha256 value can be
91+
also retrieved from the URL as opposed to only the `--hash` parameter. Fixes
92+
[#2363](https://github.com/bazel-contrib/rules_python/issues/2363).
8993

9094
{#v0-0-0-added}
9195
### Added

python/private/pypi/extension.bzl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ def _whl_repos(*, requirement, whl_library_args, download_only, netrc, auth_patt
301301
# This is no-op because pip is not used to download the wheel.
302302
args.pop("download_only", None)
303303

304-
args["requirement"] = requirement.srcs.requirement
304+
args["requirement"] = requirement.line
305305
args["urls"] = [distribution.url]
306306
args["sha256"] = distribution.sha256
307307
args["filename"] = distribution.filename
@@ -338,7 +338,7 @@ def _whl_repos(*, requirement, whl_library_args, download_only, netrc, auth_patt
338338

339339
# Fallback to a pip-installed wheel
340340
args = dict(whl_library_args) # make a copy
341-
args["requirement"] = requirement.srcs.requirement_line
341+
args["requirement"] = requirement.line
342342
if requirement.extra_pip_args:
343343
args["extra_pip_args"] = requirement.extra_pip_args
344344

python/private/pypi/index_sources.bzl

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,23 @@
1616
A file that houses private functions used in the `bzlmod` extension with the same name.
1717
"""
1818

19+
# Just list them here and me super conservative
20+
_KNOWN_EXTS = [
21+
# Note, the following source in pip has more extensions
22+
# https://github.com/pypa/pip/blob/3c5a189141a965f21a473e46c3107e689eb9f79f/src/pip/_vendor/distlib/locators.py#L90
23+
#
24+
# ".tar.bz2",
25+
# ".tar",
26+
# ".tgz",
27+
# ".tbz",
28+
#
29+
# But we support only the following, used in 'packaging'
30+
# https://github.com/pypa/pip/blob/3c5a189141a965f21a473e46c3107e689eb9f79f/src/pip/_vendor/packaging/utils.py#L137
31+
".whl",
32+
".tar.gz",
33+
".zip",
34+
]
35+
1936
def index_sources(line):
2037
"""Get PyPI sources from a requirements.txt line.
2138
@@ -58,16 +75,37 @@ def index_sources(line):
5875
).strip()
5976

6077
url = ""
78+
filename = ""
6179
if "@" in head:
62-
requirement = requirement_line
63-
_, _, url_and_rest = requirement.partition("@")
80+
maybe_requirement, _, url_and_rest = requirement.partition("@")
6481
url = url_and_rest.strip().partition(" ")[0].strip()
6582

83+
url, _, sha256 = url.partition("#sha256=")
84+
if sha256:
85+
shas.append(sha256)
86+
_, _, filename = url.rpartition("/")
87+
88+
# Replace URL encoded characters and luckily there is only one case
89+
filename = filename.replace("%2B", "+")
90+
is_known_ext = False
91+
for ext in _KNOWN_EXTS:
92+
if filename.endswith(ext):
93+
is_known_ext = True
94+
break
95+
96+
if is_known_ext:
97+
requirement = maybe_requirement.strip()
98+
else:
99+
# could not detect filename from the URL
100+
filename = ""
101+
requirement = requirement_line
102+
66103
return struct(
67104
requirement = requirement,
68105
requirement_line = requirement_line,
69106
version = version,
70107
shas = sorted(shas),
71108
marker = marker,
72109
url = url,
110+
filename = filename,
73111
)

python/private/pypi/parse_requirements.bzl

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def parse_requirements(
4040
extra_pip_args = [],
4141
get_index_urls = None,
4242
evaluate_markers = None,
43+
extract_url_srcs = True,
4344
logger = None):
4445
"""Get the requirements with platforms that the requirements apply to.
4546
@@ -58,6 +59,8 @@ def parse_requirements(
5859
the platforms stored as values in the input dict. Returns the same
5960
dict, but with values being platforms that are compatible with the
6061
requirements line.
62+
extract_url_srcs: A boolean to enable extracting URLs from requirement
63+
lines to enable using bazel downloader.
6164
logger: repo_utils.logger or None, a simple struct to log diagnostic messages.
6265
6366
Returns:
@@ -206,7 +209,7 @@ def parse_requirements(
206209
ret_requirements.append(
207210
struct(
208211
distribution = r.distribution,
209-
srcs = r.srcs,
212+
line = r.srcs.requirement if extract_url_srcs and (whls or sdist) else r.srcs.requirement_line,
210213
target_platforms = sorted(target_platforms),
211214
extra_pip_args = r.extra_pip_args,
212215
whls = whls,
@@ -281,32 +284,26 @@ def _add_dists(*, requirement, index_urls, logger = None):
281284
logger: A logger for printing diagnostic info.
282285
"""
283286

284-
# Handle direct URLs in requirements
285287
if requirement.srcs.url:
286-
url = requirement.srcs.url
287-
_, _, filename = url.rpartition("/")
288-
filename, _, _ = filename.partition("#sha256=")
289-
if "." not in filename:
290-
# detected filename has no extension, it might be an sdist ref
291-
# TODO @aignas 2025-04-03: should be handled if the following is fixed:
292-
# https://github.com/bazel-contrib/rules_python/issues/2363
288+
if not requirement.srcs.filename:
289+
if logger:
290+
logger.debug(lambda: "Could not detect the filename from the URL, falling back to pip: {}".format(
291+
requirement.srcs.url,
292+
))
293293
return [], None
294294

295-
if "@" in filename:
296-
# this is most likely foo.git@git_sha, skip special handling of these
297-
return [], None
298-
299-
direct_url_dist = struct(
300-
url = url,
301-
filename = filename,
295+
# Handle direct URLs in requirements
296+
dist = struct(
297+
url = requirement.srcs.url,
298+
filename = requirement.srcs.filename,
302299
sha256 = requirement.srcs.shas[0] if requirement.srcs.shas else "",
303300
yanked = False,
304301
)
305302

306-
if filename.endswith(".whl"):
307-
return [direct_url_dist], None
303+
if dist.filename.endswith(".whl"):
304+
return [dist], None
308305
else:
309-
return [], direct_url_dist
306+
return [], dist
310307

311308
if not index_urls:
312309
return [], None

python/private/pypi/pip_repository.bzl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,20 @@ def _pip_repository_impl(rctx):
8989
python_interpreter_target = rctx.attr.python_interpreter_target,
9090
srcs = rctx.attr._evaluate_markers_srcs,
9191
),
92+
extract_url_srcs = False,
9293
)
9394
selected_requirements = {}
9495
options = None
9596
repository_platform = host_platform(rctx)
9697
for name, requirements in requirements_by_platform.items():
97-
r = select_requirement(
98+
requirement = select_requirement(
9899
requirements,
99100
platform = None if rctx.attr.download_only else repository_platform,
100101
)
101-
if not r:
102+
if not requirement:
102103
continue
103-
options = options or r.extra_pip_args
104-
selected_requirements[name] = r.srcs.requirement_line
104+
options = options or requirement.extra_pip_args
105+
selected_requirements[name] = requirement.line
105106

106107
bzl_packages = sorted(selected_requirements.keys())
107108

python/private/pypi/whl_library.bzl

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -258,19 +258,9 @@ def _whl_library_impl(rctx):
258258
# Simulate the behaviour where the whl is present in the current directory.
259259
rctx.symlink(whl_path, whl_path.basename)
260260
whl_path = rctx.path(whl_path.basename)
261-
elif rctx.attr.urls:
261+
elif rctx.attr.urls and rctx.attr.filename:
262262
filename = rctx.attr.filename
263263
urls = rctx.attr.urls
264-
if not filename:
265-
_, _, filename = urls[0].rpartition("/")
266-
267-
if not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith(".zip")):
268-
if rctx.attr.filename:
269-
msg = "got '{}'".format(filename)
270-
else:
271-
msg = "detected '{}' from url:\n{}".format(filename, urls[0])
272-
fail("Only '.whl', '.tar.gz' or '.zip' files are supported, {}".format(msg))
273-
274264
result = rctx.download(
275265
url = urls,
276266
output = filename,

tests/pypi/extension/extension_tests.bzl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef
806806
"extra_pip_args": ["--extra-args-for-sdist-building"],
807807
"filename": "any-name.tar.gz",
808808
"python_interpreter_target": "unit_test_interpreter_target",
809-
"requirement": "direct_sdist_without_sha @ some-archive/any-name.tar.gz",
809+
"requirement": "direct_sdist_without_sha",
810810
"sha256": "",
811811
"urls": ["some-archive/any-name.tar.gz"],
812812
},
@@ -824,7 +824,7 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef
824824
],
825825
"filename": "direct_without_sha-0.0.1-py3-none-any.whl",
826826
"python_interpreter_target": "unit_test_interpreter_target",
827-
"requirement": "direct_without_sha==0.0.1 @ example-direct.org/direct_without_sha-0.0.1-py3-none-any.whl",
827+
"requirement": "direct_without_sha==0.0.1",
828828
"sha256": "",
829829
"urls": ["example-direct.org/direct_without_sha-0.0.1-py3-none-any.whl"],
830830
},
@@ -891,7 +891,7 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef
891891
],
892892
"filename": "some_pkg-0.0.1-py3-none-any.whl",
893893
"python_interpreter_target": "unit_test_interpreter_target",
894-
"requirement": "some_pkg==0.0.1 @ example-direct.org/some_pkg-0.0.1-py3-none-any.whl --hash=sha256:deadbaaf",
894+
"requirement": "some_pkg==0.0.1",
895895
"sha256": "deadbaaf",
896896
"urls": ["example-direct.org/some_pkg-0.0.1-py3-none-any.whl"],
897897
},

tests/pypi/index_sources/index_sources_tests.bzl

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,52 +23,83 @@ def _test_no_simple_api_sources(env):
2323
inputs = {
2424
"foo @ git+https://github.com/org/foo.git@deadbeef": struct(
2525
requirement = "foo @ git+https://github.com/org/foo.git@deadbeef",
26+
requirement_line = "foo @ git+https://github.com/org/foo.git@deadbeef",
2627
marker = "",
2728
url = "git+https://github.com/org/foo.git@deadbeef",
2829
shas = [],
2930
version = "",
31+
filename = "",
3032
),
3133
"foo==0.0.1": struct(
3234
requirement = "foo==0.0.1",
35+
requirement_line = "foo==0.0.1",
3336
marker = "",
3437
url = "",
3538
version = "0.0.1",
39+
filename = "",
3640
),
3741
"foo==0.0.1 @ https://someurl.org": struct(
3842
requirement = "foo==0.0.1 @ https://someurl.org",
43+
requirement_line = "foo==0.0.1 @ https://someurl.org",
3944
marker = "",
4045
url = "https://someurl.org",
4146
version = "0.0.1",
47+
filename = "",
4248
),
4349
"foo==0.0.1 @ https://someurl.org/package.whl": struct(
44-
requirement = "foo==0.0.1 @ https://someurl.org/package.whl",
50+
requirement = "foo==0.0.1",
51+
requirement_line = "foo==0.0.1 @ https://someurl.org/package.whl",
4552
marker = "",
4653
url = "https://someurl.org/package.whl",
4754
version = "0.0.1",
55+
filename = "package.whl",
4856
),
4957
"foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef": struct(
50-
requirement = "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef",
58+
requirement = "foo==0.0.1",
59+
requirement_line = "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef",
5160
marker = "",
5261
url = "https://someurl.org/package.whl",
5362
shas = ["deadbeef"],
5463
version = "0.0.1",
64+
filename = "package.whl",
5565
),
5666
"foo==0.0.1 @ https://someurl.org/package.whl; python_version < \"2.7\"\\ --hash=sha256:deadbeef": struct(
57-
requirement = "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef",
67+
requirement = "foo==0.0.1",
68+
requirement_line = "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef",
5869
marker = "python_version < \"2.7\"",
5970
url = "https://someurl.org/package.whl",
6071
shas = ["deadbeef"],
6172
version = "0.0.1",
73+
filename = "package.whl",
74+
),
75+
"foo[extra] @ https://example.org/foo-1.0.tar.gz --hash=sha256:deadbe0f": struct(
76+
requirement = "foo[extra]",
77+
requirement_line = "foo[extra] @ https://example.org/foo-1.0.tar.gz --hash=sha256:deadbe0f",
78+
marker = "",
79+
url = "https://example.org/foo-1.0.tar.gz",
80+
shas = ["deadbe0f"],
81+
version = "",
82+
filename = "foo-1.0.tar.gz",
83+
),
84+
"torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=deadbeef": struct(
85+
requirement = "torch",
86+
requirement_line = "torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=deadbeef",
87+
marker = "",
88+
url = "https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl",
89+
shas = ["deadbeef"],
90+
version = "",
91+
filename = "torch-2.6.0+cpu-cp311-cp311-linux_x86_64.whl",
6292
),
6393
}
6494
for input, want in inputs.items():
6595
got = index_sources(input)
6696
env.expect.that_collection(got.shas).contains_exactly(want.shas if hasattr(want, "shas") else [])
6797
env.expect.that_str(got.version).equals(want.version)
6898
env.expect.that_str(got.requirement).equals(want.requirement)
69-
env.expect.that_str(got.requirement_line).equals(got.requirement)
99+
env.expect.that_str(got.requirement_line).equals(got.requirement_line)
70100
env.expect.that_str(got.marker).equals(want.marker)
71101
env.expect.that_str(got.url).equals(want.url)
102+
env.expect.that_str(got.filename).equals(want.filename)
72103

73104
_tests.append(_test_no_simple_api_sources)
74105

0 commit comments

Comments
 (0)