Skip to content

refactor/fix: store dists in parse_requirements output #1917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Jun 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d65b3fb
parse_requirements: add whls and sdists attr
aignas May 22, 2024
1d7be8d
refactor: split out a function for selecting a list of wheels instead…
aignas May 23, 2024
50ff113
Handle correctly when the target platform has a python version in the…
aignas May 23, 2024
823cc03
add a note
aignas May 23, 2024
cb1ecd1
wip
aignas May 23, 2024
1800f77
docs: add bzl_library targets
aignas May 23, 2024
6dff723
fixes for #1930
aignas May 30, 2024
552c526
filter out non cp or py wheels
aignas May 30, 2024
348ca06
rewrite the filtering algorithm and harden the code
aignas May 31, 2024
341cf84
Merge branch 'main' into refactor/store-dists-in-reqs
aignas May 31, 2024
0261471
minor cleanup
aignas May 31, 2024
098835f
add a logger to the pip extension
aignas May 31, 2024
900e617
comment: add a logger to repo_utils and accept a lambda instead of st…
aignas Jun 1, 2024
90eaf3b
comment: clarify docstring
aignas Jun 1, 2024
aef1bb8
comment: clarify parameter s/want_version/want_python_version
aignas Jun 1, 2024
6dc2e62
comment: cryptic comment
aignas Jun 1, 2024
adc7708
comment: set usage
aignas Jun 1, 2024
24fa6a5
comment: describe why musl is special cased
aignas Jun 1, 2024
dec7455
comment: why we are getting sdists[0]
aignas Jun 1, 2024
2cd9502
comment: the add_dists is too coupled
aignas Jun 1, 2024
fed02eb
cleanup
aignas Jun 1, 2024
6e4ee51
fixup tests
aignas Jun 1, 2024
c9158aa
fixup debugging statements
aignas Jun 1, 2024
48b8a71
finish the debugging setup
aignas Jun 1, 2024
0fad448
clenaup bzl_library
aignas Jun 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ A brief description of the categories of changes:
"panic: runtime error: invalid memory address or nil pointer dereference"
* (bzlmod) remove `pip.parse(annotations)` attribute as it is unused and has been
replaced by whl_modifications.
* (pip) Correctly select wheels when the python tag includes minor versions.
See ([#1930](https://github.com/bazelbuild/rules_python/issues/1930))

### Added
* (rules) Precompiling Python source at build time is available. but is
Expand Down
12 changes: 11 additions & 1 deletion python/pip_install/pip_repository.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,17 @@ python_interpreter. An example value: "@python3_x86_64-unknown-linux-gnu//:pytho
),
"quiet": attr.bool(
default = True,
doc = "If True, suppress printing stdout and stderr output to the terminal.",
doc = """\
If True, suppress printing stdout and stderr output to the terminal.

If you would like to get more diagnostic output, please use:

RULES_PYTHON_REPO_DEBUG=1

or

RULES_PYTHON_REPO_DEBUG_VERBOSITY=<INFO|DEBUG|TRACE>
""",
),
"repo_prefix": attr.string(
doc = """
Expand Down
82 changes: 34 additions & 48 deletions python/private/bzlmod/pip.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ load("//python/private:parse_requirements.bzl", "host_platform", "parse_requirem
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
load("//python/private:pypi_index.bzl", "simpleapi_download")
load("//python/private:render_pkg_aliases.bzl", "whl_alias")
load("//python/private:repo_utils.bzl", "repo_utils")
load("//python/private:version_label.bzl", "version_label")
load("//python/private:whl_target_platforms.bzl", "select_whl")
load(":pip_repository.bzl", "pip_repository")
Expand Down Expand Up @@ -100,6 +101,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
)

def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, simpleapi_cache):
logger = repo_utils.logger(module_ctx)
python_interpreter_target = pip_attr.python_interpreter_target

# if we do not have the python_interpreter set in the attributes
Expand Down Expand Up @@ -160,32 +162,18 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s

# Create a new wheel library for each of the different whls

requirements_by_platform = parse_requirements(
module_ctx,
requirements_by_platform = pip_attr.requirements_by_platform,
requirements_linux = pip_attr.requirements_linux,
requirements_lock = pip_attr.requirements_lock,
requirements_osx = pip_attr.requirements_darwin,
requirements_windows = pip_attr.requirements_windows,
extra_pip_args = pip_attr.extra_pip_args,
)

index_urls = {}
get_index_urls = None
if pip_attr.experimental_index_url:
if pip_attr.download_only:
fail("Currently unsupported to use `download_only` and `experimental_index_url`")

index_urls = simpleapi_download(
module_ctx,
get_index_urls = lambda ctx, distributions: simpleapi_download(
ctx,
attr = struct(
index_url = pip_attr.experimental_index_url,
extra_index_urls = pip_attr.experimental_extra_index_urls or [],
index_url_overrides = pip_attr.experimental_index_url_overrides or {},
sources = list({
req.distribution: None
for reqs in requirements_by_platform.values()
for req in reqs
}),
sources = distributions,
envsubst = pip_attr.envsubst,
# Auth related info
netrc = pip_attr.netrc,
Expand All @@ -195,6 +183,19 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s
parallel_download = pip_attr.parallel_download,
)

requirements_by_platform = parse_requirements(
module_ctx,
requirements_by_platform = pip_attr.requirements_by_platform,
requirements_linux = pip_attr.requirements_linux,
requirements_lock = pip_attr.requirements_lock,
requirements_osx = pip_attr.requirements_darwin,
requirements_windows = pip_attr.requirements_windows,
extra_pip_args = pip_attr.extra_pip_args,
get_index_urls = get_index_urls,
python_version = major_minor,
logger = logger,
)

repository_platform = host_platform(module_ctx.os)
for whl_name, requirements in requirements_by_platform.items():
requirement = select_requirement(
Expand Down Expand Up @@ -255,37 +256,22 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s
)
whl_library_args.update({k: v for k, (v, default) in maybe_args_with_default.items() if v == default})

if index_urls:
whls = []
sdist = None
for sha256 in requirement.srcs.shas:
# For now if the artifact is marked as yanked we just ignore it.
#
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api

maybe_whl = index_urls[whl_name].whls.get(sha256)
if maybe_whl and not maybe_whl.yanked:
whls.append(maybe_whl)
continue

maybe_sdist = index_urls[whl_name].sdists.get(sha256)
if maybe_sdist and not maybe_sdist.yanked:
sdist = maybe_sdist
continue

print("WARNING: Could not find a whl or an sdist with sha256={}".format(sha256)) # buildifier: disable=print

if requirement.whls or requirement.sdist:
logger.debug(lambda: "Selecting a compatible dist for {} from dists:\n{}".format(
repository_platform,
json.encode(
struct(
whls = requirement.whls,
sdist = requirement.sdist,
),
),
))
distribution = select_whl(
whls = whls,
want_abis = [
"none",
"abi3",
"cp" + major_minor.replace(".", ""),
# Older python versions have wheels for the `*m` ABI.
"cp" + major_minor.replace(".", "") + "m",
],
whls = requirement.whls,
want_platform = repository_platform,
) or sdist
) or requirement.sdist

logger.debug(lambda: "Selected: {}".format(distribution))

if distribution:
whl_library_args["requirement"] = requirement.srcs.requirement
Expand All @@ -303,7 +289,7 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, group_map, s
# This is no-op because pip is not used to download the wheel.
whl_library_args.pop("download_only", None)
else:
print("WARNING: falling back to pip for installing the right file for {}".format(requirement.requirement_line)) # buildifier: disable=print
logger.warn("falling back to pip for installing the right file for {}".format(requirement.requirement_line))

# We sort so that the lock-file remains the same no matter the order of how the
# args are manipulated in the code going before.
Expand Down
123 changes: 109 additions & 14 deletions python/private/parse_requirements.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ behavior.
load("//python/pip_install:requirements_parser.bzl", "parse")
load(":normalize_name.bzl", "normalize_name")
load(":pypi_index_sources.bzl", "get_simpleapi_sources")
load(":whl_target_platforms.bzl", "whl_target_platforms")
load(":whl_target_platforms.bzl", "select_whls", "whl_target_platforms")

# This includes the vendored _translate_cpu and _translate_os from
# @platforms//host:extension.bzl at version 0.0.9 so that we don't
Expand Down Expand Up @@ -84,6 +84,11 @@ def _default_platforms(*, filter):
if not filter:
fail("Must specific a filter string, got: {}".format(filter))

if filter.startswith("cp3"):
# TODO @aignas 2024-05-23: properly handle python versions in the filter.
# For now we are just dropping it to ensure that we don't fail.
_, _, filter = filter.partition("_")

sanitized = filter.replace("*", "").replace("_", "")
if sanitized and not sanitized.isalnum():
fail("The platform filter can only contain '*', '_' and alphanumerics")
Expand Down Expand Up @@ -142,6 +147,9 @@ def parse_requirements(
requirements_lock = None,
requirements_windows = None,
extra_pip_args = [],
get_index_urls = None,
python_version = None,
logger = None,
fail_fn = fail):
"""Get the requirements with platforms that the requirements apply to.

Expand All @@ -156,6 +164,12 @@ def parse_requirements(
requirements_windows (label): The requirements file for windows OS.
extra_pip_args (string list): Extra pip arguments to perform extra validations and to
be joined with args fined in files.
get_index_urls: Callable[[ctx, list[str]], dict], a callable to get all
of the distribution URLs from a PyPI index. Accepts ctx and
distribution names to query.
python_version: str or None. This is needed when the get_index_urls is
specified. It should be of the form "3.x.x",
logger: repo_utils.logger or None, a simple struct to log diagnostic messages.
fail_fn (Callable[[str], None]): A failure function used in testing failure cases.

Returns:
Expand Down Expand Up @@ -312,20 +326,46 @@ def parse_requirements(
)
for_req.target_platforms.append(target_platform)

return {
whl_name: [
struct(
distribution = r.distribution,
srcs = r.srcs,
requirement_line = r.requirement_line,
target_platforms = sorted(r.target_platforms),
extra_pip_args = r.extra_pip_args,
download = r.download,
index_urls = {}
if get_index_urls:
if not python_version:
fail_fn("'python_version' must be provided")
return None

index_urls = get_index_urls(
ctx,
# Use list({}) as a way to have a set
list({
req.distribution: None
for reqs in requirements_by_platform.values()
for req in reqs.values()
}),
)

ret = {}
for whl_name, reqs in requirements_by_platform.items():
for r in sorted(reqs.values(), key = lambda r: r.requirement_line):
whls, sdist = _add_dists(
r,
index_urls.get(whl_name),
python_version = python_version,
logger = logger,
)
for r in sorted(reqs.values(), key = lambda r: r.requirement_line)
]
for whl_name, reqs in requirements_by_platform.items()
}

ret.setdefault(whl_name, []).append(
struct(
distribution = r.distribution,
srcs = r.srcs,
requirement_line = r.requirement_line,
target_platforms = sorted(r.target_platforms),
extra_pip_args = r.extra_pip_args,
download = r.download,
whls = whls,
sdist = sdist,
),
)

return ret

def select_requirement(requirements, *, platform):
"""A simple function to get a requirement for a particular platform.
Expand Down Expand Up @@ -372,3 +412,58 @@ def host_platform(repository_os):
_translate_os(repository_os.name.lower()),
_translate_cpu(repository_os.arch.lower()),
)

def _add_dists(requirement, index_urls, python_version, logger = None):
"""Populate dists based on the information from the PyPI index.

This function will modify the given requirements_by_platform data structure.

Args:
requirement: The result of parse_requirements function.
index_urls: The result of simpleapi_download.
python_version: The version of the python interpreter.
logger: A logger for printing diagnostic info.
"""
if not index_urls:
return [], None

whls = []
sdist = None

# TODO @aignas 2024-05-22: it is in theory possible to add all
# requirements by version instead of by sha256. This may be useful
# for some projects.
for sha256 in requirement.srcs.shas:
# For now if the artifact is marked as yanked we just ignore it.
#
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api

maybe_whl = index_urls.whls.get(sha256)
if maybe_whl and not maybe_whl.yanked:
whls.append(maybe_whl)
continue

maybe_sdist = index_urls.sdists.get(sha256)
if maybe_sdist and not maybe_sdist.yanked:
sdist = maybe_sdist
continue

if logger:
logger.warn("Could not find a whl or an sdist with sha256={}".format(sha256))

# Filter out the wheels that are incompatible with the target_platforms.
whls = select_whls(
whls = whls,
want_abis = [
"none",
"abi3",
"cp" + python_version.replace(".", ""),
# Older python versions have wheels for the `*m` ABI.
"cp" + python_version.replace(".", "") + "m",
],
want_platforms = requirement.target_platforms,
want_python_version = python_version,
logger = logger,
)

return whls, sdist
26 changes: 25 additions & 1 deletion python/private/parse_whl_name.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,30 @@
A starlark implementation of a Wheel filename parsing.
"""

# Taken from https://peps.python.org/pep-0600/
_LEGACY_ALIASES = {
"manylinux1_i686": "manylinux_2_5_i686",
"manylinux1_x86_64": "manylinux_2_5_x86_64",
"manylinux2010_i686": "manylinux_2_12_i686",
"manylinux2010_x86_64": "manylinux_2_12_x86_64",
"manylinux2014_aarch64": "manylinux_2_17_aarch64",
"manylinux2014_armv7l": "manylinux_2_17_armv7l",
"manylinux2014_i686": "manylinux_2_17_i686",
"manylinux2014_ppc64": "manylinux_2_17_ppc64",
"manylinux2014_ppc64le": "manylinux_2_17_ppc64le",
"manylinux2014_s390x": "manylinux_2_17_s390x",
"manylinux2014_x86_64": "manylinux_2_17_x86_64",
}

def normalize_platform_tag(tag):
"""Resolve legacy aliases to modern equivalents for easier parsing elsewhere."""
return ".".join(list({
# The `list({})` usage here is to use it as a string set, where we will
# deduplicate, but otherwise retain the order of the tags.
_LEGACY_ALIASES.get(p, p): None
for p in tag.split(".")
}))

def parse_whl_name(file):
"""Parse whl file name into a struct of constituents.

Expand Down Expand Up @@ -68,5 +92,5 @@ def parse_whl_name(file):
build_tag = build_tag,
python_tag = python_tag,
abi_tag = abi_tag,
platform_tag = platform_tag,
platform_tag = normalize_platform_tag(platform_tag),
)
Loading
Loading