Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP refactor(pypi): pep508 parsing in starlark #2629

Draft
wants to merge 47 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
07fedac
feat(pep508): initial parsing support
aignas Feb 13, 2025
ae8ccfc
start reimplementation
aignas Feb 13, 2025
bccecf2
continue with the tests
aignas Feb 13, 2025
d4173e1
add python_version arg to deps
aignas Feb 14, 2025
84ba241
wip
aignas Feb 14, 2025
d844dd0
wip
aignas Feb 14, 2025
d11bd2b
wip
aignas Feb 23, 2025
743b992
wip
aignas Feb 23, 2025
9aa2a3f
stringify platforms later
aignas Feb 23, 2025
3f56e1c
one more test passing
aignas Feb 23, 2025
5b12849
common deps addition
aignas Feb 23, 2025
4a22828
handle self edges
aignas Feb 23, 2025
b68db5b
self deps and specific python version deps
aignas Feb 23, 2025
8190c3a
single version test
aignas Feb 23, 2025
82cc18b
one more test is passing
aignas Feb 23, 2025
e26232c
simplifying of deps
aignas Feb 23, 2025
31161db
wip
aignas Feb 23, 2025
0a191ae
fix the impl
aignas Feb 23, 2025
973ff06
last test passes
aignas Feb 23, 2025
df60414
upgrade the copyright
aignas Feb 23, 2025
d3a46d6
wip
aignas Feb 23, 2025
5fbd827
wip
aignas Feb 23, 2025
58c7532
sorting
aignas Feb 23, 2025
ef1cacc
wip
aignas Feb 23, 2025
67f12e8
reorder and cleanr
aignas Feb 23, 2025
72fdac6
wip
aignas Feb 23, 2025
18fe4cc
use starlark to eval markers
aignas Feb 23, 2025
58755bd
cleanup dead code
aignas Feb 23, 2025
b4fe670
add bzl libs
aignas Feb 23, 2025
e048635
temp addition of the module lock file
aignas Feb 23, 2025
05a4612
Revert "add bzl libs"
aignas Feb 23, 2025
1b11c12
Revert "cleanup dead code"
aignas Feb 23, 2025
301f99f
Revert "use starlark to eval markers"
aignas Feb 23, 2025
49433ba
wip
aignas Feb 23, 2025
458cb45
Reapply "use starlark to eval markers"
aignas Feb 23, 2025
226344f
Reapply "cleanup dead code"
aignas Feb 23, 2025
f684759
Reapply "add bzl libs"
aignas Feb 23, 2025
fe5d7fc
no python usage for req parsing
aignas Feb 23, 2025
ca2527e
fully move the parsing of Requires-Dist to starlark
aignas Feb 23, 2025
62df3ac
remove the MODULE.bazel.lock
aignas Feb 23, 2025
c5ff77a
simplify
aignas Feb 23, 2025
b92a67d
wip
aignas Feb 23, 2025
2049afc
Merge branch 'main' into feat/starlark-env-marker-and-metadata-parsing
aignas Mar 23, 2025
6177760
add notes
aignas Mar 23, 2025
96f65e0
pass the version
aignas Mar 23, 2025
7f99cb7
move the dep parsing to the analysis phase
aignas Mar 23, 2025
1de8269
wip
aignas Mar 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion python/private/pypi/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ bzl_library(
name = "evaluate_markers_bzl",
srcs = ["evaluate_markers.bzl"],
deps = [
":pypi_repo_utils_bzl",
":pep508_env_bzl",
":pep508_evaluate_bzl",
":pep508_req_bzl",
],
)

Expand Down Expand Up @@ -208,6 +210,33 @@ bzl_library(
],
)

bzl_library(
name = "pep508_evaluate_bzl",
srcs = ["pep508_evaluate.bzl"],
deps = [
"//python/private:enum_bzl",
"//python/private:semver_bzl",
],
)

bzl_library(
name = "pep508_req_bzl",
srcs = ["pep508_req.bzl"],
deps = [
"//python/private:normalize_name_bzl",
],
)

bzl_library(
name = "pep508_env_bzl",
srcs = ["pep508_env.bzl"],
deps = [
":pep508_evaluate_bzl",
":pep508_req_bzl",
"//python/private:normalize_name_bzl",
],
)

bzl_library(
name = "pip_bzl",
srcs = ["pip.bzl"],
Expand Down
67 changes: 13 additions & 54 deletions python/private/pypi/evaluate_markers.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -14,65 +14,24 @@

"""A simple function that evaluates markers using a python interpreter."""

load(":deps.bzl", "record_files")
load(":pypi_repo_utils.bzl", "pypi_repo_utils")
load(":pep508_env.bzl", "env", _platform_from_str = "platform_from_str")
load(":pep508_evaluate.bzl", "evaluate")
load(":pep508_req.bzl", _req = "requirement")

# Used as a default value in a rule to ensure we fetch the dependencies.
SRCS = [
# When the version, or any of the files in `packaging` package changes,
# this file will change as well.
record_files["pypi__packaging"],
Label("//python/private/pypi/requirements_parser:resolve_target_platforms.py"),
Label("//python/private/pypi/whl_installer:platform.py"),
]

def evaluate_markers(mrctx, *, requirements, python_interpreter, python_interpreter_target, srcs, logger = None):
def evaluate_markers(requirements):
"""Return the list of supported platforms per requirements line.

Args:
mrctx: repository_ctx or module_ctx.
requirements: list[str] of the requirement file lines to evaluate.
python_interpreter: str, path to the python_interpreter to use to
evaluate the env markers in the given requirements files. It will
be only called if the requirements files have env markers. This
should be something that is in your PATH or an absolute path.
python_interpreter_target: Label, same as python_interpreter, but in a
label format.
srcs: list[Label], the value of SRCS passed from the `rctx` or `mctx` to this function.
logger: repo_utils.logger or None, a simple struct to log diagnostic
messages. Defaults to None.
requirements: dict[str, list[str]] of the requirement file lines to evaluate.

Returns:
dict of string lists with target platforms
"""
if not requirements:
return {}

in_file = mrctx.path("requirements_with_markers.in.json")
out_file = mrctx.path("requirements_with_markers.out.json")
mrctx.file(in_file, json.encode(requirements))

pypi_repo_utils.execute_checked(
mrctx,
op = "ResolveRequirementEnvMarkers({})".format(in_file),
python = pypi_repo_utils.resolve_python_interpreter(
mrctx,
python_interpreter = python_interpreter,
python_interpreter_target = python_interpreter_target,
),
arguments = [
"-m",
"python.private.pypi.requirements_parser.resolve_target_platforms",
in_file,
out_file,
],
srcs = srcs,
environment = {
"PYTHONPATH": [
Label("@pypi__packaging//:BUILD.bazel"),
Label("//:BUILD.bazel"),
],
},
logger = logger,
)
return json.decode(mrctx.read(out_file))
ret = {}
for req_string, platforms in requirements.items():
req = _req(req_string)
for platform in platforms:
if evaluate(req.marker, env = env(_platform_from_str(platform, None))):
ret.setdefault(req_string, []).append(platform)

return ret
58 changes: 28 additions & 30 deletions python/private/pypi/extension.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@

load("@bazel_features//:features.bzl", "bazel_features")
load("@pythons_hub//:interpreters.bzl", "INTERPRETER_LABELS")
load("@pythons_hub//:versions.bzl", "MINOR_MAPPING")
load("//python/private:auth.bzl", "AUTH_ATTRS")
load("//python/private:full_version.bzl", "full_version")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:repo_utils.bzl", "repo_utils")
load("//python/private:semver.bzl", "semver")
load("//python/private:version_label.bzl", "version_label")
load(":attrs.bzl", "use_isolated")
load(":evaluate_markers.bzl", "evaluate_markers", EVALUATE_MARKERS_SRCS = "SRCS")
load(":evaluate_markers.bzl", "evaluate_markers")
load(":hub_repository.bzl", "hub_repository", "whl_config_settings_to_json")
load(":parse_requirements.bzl", "parse_requirements")
load(":parse_whl_name.bzl", "parse_whl_name")
Expand Down Expand Up @@ -166,31 +168,14 @@ def _create_whl_repos(
),
extra_pip_args = pip_attr.extra_pip_args,
get_index_urls = get_index_urls,
# NOTE @aignas 2024-08-02: , we will execute any interpreter that we find either
# in the PATH or if specified as a label. We will configure the env
# markers when evaluating the requirement lines based on the output
# from the `requirements_files_by_platform` which should have something
# similar to:
# {
# "//:requirements.txt": ["cp311_linux_x86_64", ...]
# }
#
# We know the target python versions that we need to evaluate the
# markers for and thus we don't need to use multiple python interpreter
# instances to perform this manipulation. This function should be executed
# only once by the underlying code to minimize the overhead needed to
# spin up a Python interpreter.
evaluate_markers = lambda module_ctx, requirements: evaluate_markers(
module_ctx,
requirements = requirements,
python_interpreter = pip_attr.python_interpreter,
python_interpreter_target = python_interpreter_target,
srcs = pip_attr._evaluate_markers_srcs,
logger = logger,
),
# NOTE @aignas 2025-02-24: we will use the "cp3xx_os_arch" platform labels
# for converting to the PEP508 environment and will evaluate them in starlark
# without involving the interpreter at all.
evaluate_markers = evaluate_markers,
logger = logger,
)

platforms = {}
for whl_name, requirements in requirements_by_platform.items():
group_name = whl_group_mapping.get(whl_name)
group_deps = requirement_cycles.get(group_name, [])
Expand Down Expand Up @@ -251,6 +236,10 @@ def _create_whl_repos(
))

whl_libraries[repo_name] = args

# TODO @aignas 2025-03-23: make this more efficient
for p in args.pop("experimental_target_platforms", []):
platforms[p] = None
whl_map.setdefault(whl_name, {})[config_setting] = repo_name

return struct(
Expand All @@ -262,6 +251,7 @@ def _create_whl_repos(
},
extra_aliases = extra_aliases,
whl_libraries = whl_libraries,
platforms = platforms,
)

def _whl_repos(*, requirement, whl_library_args, download_only, netrc, auth_patterns, multiple_requirements_for_whl = False, python_version):
Expand Down Expand Up @@ -426,6 +416,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
exposed_packages = {}
extra_aliases = {}
whl_libraries = {}
platforms = {}

is_reproducible = True

Expand Down Expand Up @@ -503,6 +494,7 @@ You cannot use both the additive_build_content and additive_build_content_file a
extra_aliases[hub_name].setdefault(whl_name, {}).update(aliases)
exposed_packages.setdefault(hub_name, {}).update(out.exposed_packages)
whl_libraries.update(out.whl_libraries)
platforms.setdefault(hub_name, {}).update(out.platforms)

# TODO @aignas 2024-04-05: how do we support different requirement
# cycles for different abis/oses? For now we will need the users to
Expand Down Expand Up @@ -539,6 +531,17 @@ You cannot use both the additive_build_content and additive_build_content_file a
}
for hub_name, extra_whl_aliases in extra_aliases.items()
},
platforms = {
hub_name: sorted(p)
for hub_name, p in platforms.items()
},
python_versions = {
hub_name: sorted({
full_version(version = v, minor_mapping = MINOR_MAPPING): None
for v in m.python_versions
})
for hub_name, m in pip_hub_map.items()
},
whl_libraries = {
k: dict(sorted(args.items()))
for k, args in sorted(whl_libraries.items())
Expand Down Expand Up @@ -630,6 +633,8 @@ def _pip_impl(module_ctx):
for key, values in whl_map.items()
},
packages = mods.exposed_packages.get(hub_name, []),
python_versions = mods.python_versions[hub_name],
platforms = mods.platforms.get(hub_name, ["host"]),
groups = mods.hub_group_map.get(hub_name),
)

Expand Down Expand Up @@ -764,13 +769,6 @@ a corresponding `python.toolchain()` configured.
doc = """\
A dict of labels to wheel names that is typically generated by the whl_modifications.
The labels are JSON config files describing the modifications.
""",
),
"_evaluate_markers_srcs": attr.label_list(
default = EVALUATE_MARKERS_SRCS,
doc = """\
The list of labels to use as SRCS for the marker evaluation code. This ensures that the
code will be re-evaluated when any of files in the default changes.
""",
),
}, **ATTRS)
Expand Down
25 changes: 24 additions & 1 deletion python/private/pypi/generate_whl_library_build_bazel.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

"""Generate the BUILD.bazel contents for a repo defined by a whl_library."""

load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
load("//python/private:text_util.bzl", "render")

_RENDER = {
Expand All @@ -24,7 +25,11 @@ _RENDER = {
"dependencies": render.list,
"dependencies_by_platform": lambda x: render.dict(x, value_repr = render.list),
"entry_points": render.dict,
"extras": render.list,
"group_deps": render.list,
"host_python_version": str,
"platforms": str,
"requires_dist": render.list,
"srcs_exclude": render.list,
"tags": render.list,
}
Expand All @@ -33,7 +38,7 @@ _RENDER = {
# this repository can be publicly visible without the need for
# export_files
_TEMPLATE = """\
load("@rules_python//python/private/pypi:whl_library_targets.bzl", "whl_library_targets")
{loads}

package(default_visibility = ["//visibility:public"])

Expand All @@ -58,6 +63,17 @@ def generate_whl_library_build_bazel(
"""

additional_content = []
loads = {
"@rules_python//python/private/pypi:whl_library_targets.bzl": ('"whl_library_targets"',),
}
if BZLMOD_ENABLED:
dep_template = kwargs["dep_template"]
loads[dep_template.format(
name = "",
target = "requirements.bzl",
)] = ("hub_settings = \"private\"",)
kwargs["platforms"] = "hub_settings.platforms"
kwargs["host_python_version"] = "hub_settings.python_versions[0]"
if annotation:
kwargs["data"] = annotation.data
kwargs["copy_files"] = annotation.copy_files
Expand All @@ -70,6 +86,13 @@ def generate_whl_library_build_bazel(
contents = "\n".join(
[
_TEMPLATE.format(
loads = "\n".join([
"load({}, {})".format(
repr(path),
", ".join([s for s in symbols]),
)
for path, symbols in loads.items()
]),
kwargs = render.indent("\n".join([
"{} = {},".format(k, _RENDER.get(k, repr)(v))
for k, v in sorted(kwargs.items())
Expand Down
26 changes: 26 additions & 0 deletions python/private/pypi/hub_repository.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
""

load("//python/private:text_util.bzl", "render")
load(":parse_requirements.bzl", "host_platform")
load(":render_pkg_aliases.bzl", "render_multiplatform_pkg_aliases")
load(":whl_config_setting.bzl", "whl_config_setting")

Expand Down Expand Up @@ -44,6 +45,11 @@ def _impl(rctx):
# `requirement`, et al. macros.
macro_tmpl = "@@{name}//{{}}:{{}}".format(name = rctx.attr.name)

platforms = [
host_platform(rctx) if p == "host" else p
for p in rctx.attr.platforms
]

rctx.file("BUILD.bazel", _BUILD_FILE_CONTENTS)
rctx.template("requirements.bzl", rctx.attr._template, substitutions = {
"%%ALL_DATA_REQUIREMENTS%%": render.list([
Expand All @@ -59,6 +65,8 @@ def _impl(rctx):
for p in bzl_packages
}),
"%%MACRO_TMPL%%": macro_tmpl,
"%%PLATFORMS%%": render.indent(render.list(sorted(platforms))).lstrip(),
"%%PYTHON_VERSIONS%%": render.indent(render.list(sorted(rctx.attr.python_versions))).lstrip(),
})

hub_repository = repository_rule(
Expand All @@ -74,6 +82,24 @@ hub_repository = repository_rule(
mandatory = False,
doc = """\
The list of packages that will be exposed via all_*requirements macros. Defaults to whl_map keys.
""",
),
# TODO @aignas 2025-03-23: get the `platforms` and `python_versions`
# from the aliases? This requires us to only create aliases for the
# target platforms and python_versions we care about. Python versions
# probably still need to be passed in.
"platforms": attr.string_list(
mandatory = True,
doc = """\
The list of target platforms that are supported in this hub repository. This
can contain 'abi_os_arch' tuples or 'host' to keep the lock files os/arch
agnostic.
""",
),
"python_versions": attr.string_list(
mandatory = True,
doc = """\
The list of python versions that are supported in this hub repository.
""",
),
"repo_name": attr.string(
Expand Down
Loading