Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

generate a multi-level PyPi index as per PEP 503 #1

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions .github/workflows/pages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,18 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Setup Pages
uses: actions/configure-pages@v3
- uses: actions/setup-python@v5
with:
python-version: '3.9'
- name: Generate index
run: |
mkdir -p github-pages/simple
pip install pygithub
python generate_index.py > github-pages/simple/index.html
python -m venv venv
venv/bin/pip install -r ./requirements.txt
mkdir -p github-pages
venv/bin/python generate_index.py --url-path-prefix=/simple github-pages/simple
env:
GH_TOKEN: ${{ github.token }}
- name: Upload artifact
Expand Down
130 changes: 109 additions & 21 deletions generate_index.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,118 @@
from __future__ import annotations

import argparse
from collections import defaultdict
import json
import os
from pathlib import Path
import sys
from packaging.utils import parse_wheel_filename
from urllib.parse import urlparse
from textwrap import dedent

import github

##
## Output a PEP 503 compliant package repository for Pants wheels.
## See https://peps.python.org/pep-0503/
##


def main() -> str:
gh = github.Github(auth=github.Auth.Token(os.environ["GH_TOKEN"]))
def get_pants_python_packages(gh: github.Github) -> tuple[str, ...]:
repo = gh.get_repo("pantsbuild/pants")
releases = repo.get_releases()
index = "\n".join(
[
"<html>",
"<body>",
"<h1>Links for Pantsbuild Wheels</h1>",
*(
f'<a href="{asset.browser_download_url}">{asset.name}</a><br>'
for release in releases
if release.tag_name.startswith("release_2")
for asset in release.assets
if asset.name.endswith(".whl")
),
"</body>",
"</html>",
]
)
return index
all_releases = repo.get_releases()

pants_wheel_assets = [
asset
for release in all_releases
if release.tag_name.startswith("release_2")
for asset in release.assets
if asset.name.endswith(".whl")
]

packages = defaultdict(lambda: defaultdict(list))

for asset in pants_wheel_assets:
name, version, build_tag, tags = parse_wheel_filename(asset.name)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PEP 503 seems to suggest we MUST be using the normalised names in various places. Does this guarantee name is the normalised package name? Or maybe the wheel file names are guaranteed to be normalised?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

packages[name][version].append(asset)

return packages


def main(args):
parser = argparse.ArgumentParser()
parser.add_argument("--url-path-prefix", default="/", action="store")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My read of pep-0503 is that the URLs must always be exactly /<project>/, i.e. customising this --url-path-prefix value may not work at all, and so potentially shouldn't be exposed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My read of PEP 503 is that they are the full path with trailing /. And checking PyPi itself shows that the links include the prefix since they are /simple/PKG_NAME/. (For example, look at curl https://pypi.org/simple/ -o pypi.html.)

parser.add_argument("output_dir", action="store")
opts = parser.parse_args(args)

github_client = github.Github(auth=github.Auth.Token(os.environ["GH_TOKEN"]))
packages = get_pants_python_packages(github_client)
package_names = sorted(packages.keys())

prefix = opts.url_path_prefix
if prefix and prefix.endswith("/"):
prefix = prefix[0:-1]

output_dir = Path(opts.output_dir)
if output_dir.exists():
raise Exception(f"Output directory `{output_dir}` already exists.")
output_dir.mkdir(parents=True)

# http://repository.example.com/simple/
with open(output_dir / "index.html", "w") as f:
f.write(dedent(
"""\
<!DOCTYPE html>
<html>
<body>
<h1>Links for Pantsbuild Wheels</h1>
<ul>
"""
))
for package_name in package_names:
f.write(f"""<li><a href="{prefix}/{package_name}/">{package_name}</a></li>\n""")
f.write(dedent(
"""\
</ul>
</body>
</html>
"""
))

# http://repository.example.com/simple/PACKAGE_NAME/
for package_name in package_names:
package = packages[package_name]

package_output_dir = output_dir / package_name
package_output_dir.mkdir()

package_version_keys = sorted(package.keys())

with open(package_output_dir / "index.html", "w") as f:
f.write(dedent(
f"""\
<!DOCTYPE html>
<html>
<body>
<h1>Links for Pantsbuild Wheels - {package_name}</h1>
<ul>
"""
))

for package_version_key in package_version_keys:
package_version_assets = package[package_version_key]
package_version_assets.sort(key=lambda x: x.name)
for asset in package_version_assets:
f.write(f"""<li><a href="{asset.browser_download_url}">{asset.name}</a></li>\n""")

f.write(dedent(
"""\
</ul>
</body>
</html>
"""
))


if __name__ == "__main__":
print(main())
sys.exit(main(sys.argv[1:]))
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
packaging==24.2
pygithub==2.5.0
13 changes: 9 additions & 4 deletions test_generated_index.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#!/bin/bash

output_dir="$1"
if [ -z "${output_dir}" ]; then
echo "usage: $0 OUTPUT_DIRECTORY" 1>&2
exit 1
fi
if [ -e "${output_dir}" ]; then
echo "ERROR: Output directory exists. This script requires that it not exist already." 1>&2
exit 1
Expand All @@ -15,8 +19,9 @@ python3.9 -m venv "${venv_dir}"
"${venv_dir}/bin/pip" install -r ./requirements.txt

# Generate the Pants PyPi-compatible index.
mkdir -p "${output_dir}/public/simple"
"${venv_dir}/bin/python" ./generate_index.py > "${output_dir}/public/simple/index.html"
"${venv_dir}/bin/python" ./generate_index.py \
--url-path-prefix=/simple \
"${output_dir}/public/simple"

# Serve a copy of the generated index on port 8080.
python3.9 -m http.server -d "${output_dir}/public" --bind 127.0.0.1 8080 &
Expand All @@ -27,8 +32,8 @@ python3.9 -m http.server -d "${output_dir}/public" --bind 127.0.0.1 8080 &
pants_venv_dir="${output_dir}/pants-venv"
python3.9 -m venv "${pants_venv_dir}"
"${pants_venv_dir}/bin/pip" install -vv \
--extra-index-url=http://127.0.0.1:8080/simple/ \
pantsbuild.pants==2.18.0a0
--extra-index-url=http://127.0.0.1:8080/simple/ \
pantsbuild.pants==2.23.0

# Verify that the Pants console script is in the expected location.
if [ ! -f "${pants_venv_dir}/bin/pants" ]; then
Expand Down