-
Notifications
You must be signed in to change notification settings - Fork 80
[PULP-946] Implement PEP 658 #1031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
jobselko
wants to merge
3
commits into
pulp:main
Choose a base branch
from
jobselko:pep_658
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
215 changes: 215 additions & 0 deletions
215
pulp_python/app/migrations/0019_create_missing_metadata_artifacts.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,215 @@ | ||
| # Generated manually on 2025-12-15 14:00 for creating missing metadata artifacts | ||
|
|
||
| from django.db import migrations | ||
|
|
||
|
|
||
| def pulp_hashlib_new(name, *args, **kwargs): | ||
| """ | ||
| Copied and updated (to comply with migrations) from pulpcore. | ||
| """ | ||
| import hashlib as the_real_hashlib | ||
| from django.conf import settings | ||
|
|
||
| if name not in settings.ALLOWED_CONTENT_CHECKSUMS: | ||
| return None | ||
|
|
||
| return the_real_hashlib.new(name, *args, **kwargs) | ||
|
|
||
|
|
||
| def init_and_validate(file, artifact_model, expected_digests=None, expected_size=None): | ||
| """ | ||
| Copied and updated (to comply with migrations) from pulpcore. | ||
| """ | ||
| from django.conf import settings | ||
|
|
||
| digest_fields = [] | ||
| for alg in ("sha512", "sha384", "sha256", "sha224", "sha1", "md5"): | ||
| if alg in settings.ALLOWED_CONTENT_CHECKSUMS: | ||
| digest_fields.append(alg) | ||
|
|
||
| if isinstance(file, str): | ||
| with open(file, "rb") as f: | ||
| hashers = { | ||
| n: hasher for n in digest_fields if (hasher := pulp_hashlib_new(n)) is not None | ||
| } | ||
| if not hashers: | ||
| return None | ||
|
|
||
| size = 0 | ||
| while True: | ||
| chunk = f.read(1048576) # 1 megabyte | ||
| if not chunk: | ||
| break | ||
| for algorithm in hashers.values(): | ||
| algorithm.update(chunk) | ||
| size = size + len(chunk) | ||
| else: | ||
| size = file.size | ||
| hashers = file.hashers | ||
|
|
||
| if expected_size: | ||
| if size != expected_size: | ||
| return None | ||
|
|
||
| if expected_digests: | ||
| for algorithm, expected_digest in expected_digests.items(): | ||
| if algorithm not in hashers: | ||
| return None | ||
| actual_digest = hashers[algorithm].hexdigest() | ||
| if expected_digest != actual_digest: | ||
| return None | ||
|
|
||
| attributes = {"size": size, "file": file} | ||
| for algorithm in digest_fields: | ||
| attributes[algorithm] = hashers[algorithm].hexdigest() | ||
|
|
||
| return artifact_model(**attributes) | ||
|
|
||
|
|
||
| def extract_wheel_metadata(filename): | ||
| """ | ||
| Extract the metadata file content from a wheel file. | ||
| Returns the raw metadata content as bytes or None if metadata cannot be extracted. | ||
| """ | ||
| import zipfile | ||
|
|
||
| if not filename.endswith(".whl"): | ||
| return None | ||
| try: | ||
| with zipfile.ZipFile(filename, "r") as f: | ||
| for file_path in f.namelist(): | ||
| if file_path.endswith(".dist-info/METADATA"): | ||
| return f.read(file_path) | ||
| except (zipfile.BadZipFile, KeyError, OSError): | ||
| pass | ||
| return None | ||
|
|
||
|
|
||
| def artifact_to_metadata_artifact(filename, artifact, tmp_dir, artifact_model): | ||
| """ | ||
| Creates artifact for metadata from the provided wheel artifact. | ||
| """ | ||
| import os | ||
| import shutil | ||
| import tempfile | ||
|
|
||
| if not filename.endswith(".whl"): | ||
| return None | ||
|
|
||
| temp_wheel_path = None | ||
| temp_metadata_path = None | ||
| try: | ||
| with tempfile.NamedTemporaryFile( | ||
| "wb", dir=tmp_dir, suffix=filename, delete=False | ||
| ) as temp_file: | ||
| temp_wheel_path = temp_file.name | ||
| artifact.file.seek(0) | ||
| shutil.copyfileobj(artifact.file, temp_file) | ||
| temp_file.flush() | ||
|
|
||
| metadata_content = extract_wheel_metadata(temp_wheel_path) | ||
| if not metadata_content: | ||
| return None | ||
|
|
||
| with tempfile.NamedTemporaryFile( | ||
| "wb", dir=tmp_dir, suffix=".metadata", delete=False | ||
| ) as temp_md: | ||
| temp_metadata_path = temp_md.name | ||
| temp_md.write(metadata_content) | ||
| temp_md.flush() | ||
|
|
||
| # todo: pass metadata_sha256 from PPC to expected_digests in init_and_validate? | ||
| # if not, simplify init_and_validate | ||
| metadata_artifact = init_and_validate(temp_metadata_path, artifact_model) | ||
| if not metadata_artifact: | ||
| return None | ||
|
|
||
| try: | ||
| metadata_artifact.save() | ||
| except Exception: | ||
| return None | ||
|
|
||
| return metadata_artifact | ||
|
|
||
| finally: | ||
| if temp_wheel_path and os.path.exists(temp_wheel_path): | ||
| os.unlink(temp_wheel_path) | ||
| if temp_metadata_path and os.path.exists(temp_metadata_path): | ||
| os.unlink(temp_metadata_path) | ||
|
|
||
|
|
||
| # todo: bulk create? | ||
| def create_missing_metadata_artifacts(apps, schema_editor): | ||
| """ | ||
| Create metadata artifacts for PythonPackageContent instances that have metadata_sha256 | ||
| but are missing the corresponding metadata artifact. | ||
| """ | ||
| import tempfile | ||
| from django.conf import settings | ||
|
|
||
| PythonPackageContent = apps.get_model("python", "PythonPackageContent") | ||
| ContentArtifact = apps.get_model("core", "ContentArtifact") | ||
| Artifact = apps.get_model("core", "Artifact") | ||
|
|
||
| packages = ( | ||
| PythonPackageContent.objects.filter(metadata_sha256__isnull=False) | ||
| .exclude(metadata_sha256="") | ||
| .prefetch_related("contentartifact_set") | ||
| ) | ||
| # todo: only for testing, remove later | ||
| created_count = 0 | ||
| skipped_count = 0 | ||
|
|
||
| # todo: do i need temp dir? (not needed in localhost) | ||
| with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir: | ||
| for package in packages: | ||
| metadata_relative_path = f"{package.filename}.metadata" | ||
| content_artifacts = list(package.contentartifact_set.all()) | ||
|
|
||
| if any(ca.relative_path == metadata_relative_path for ca in content_artifacts): | ||
| # Metadata artifact already exist | ||
| continue | ||
|
|
||
| main_content_artifact = next( | ||
| (ca for ca in content_artifacts if ca.relative_path == package.filename), | ||
| None, | ||
| ) | ||
| if not main_content_artifact: | ||
| # Main artifact does not exist | ||
| skipped_count += 1 | ||
| continue | ||
|
|
||
| metadata_artifact = artifact_to_metadata_artifact( | ||
| package.filename, main_content_artifact.artifact, temp_dir, Artifact | ||
| ) | ||
| if not metadata_artifact: | ||
| # Failed to create metadata artifact | ||
| skipped_count += 1 | ||
| continue | ||
|
|
||
| try: | ||
| ContentArtifact.objects.create( | ||
| artifact=metadata_artifact, | ||
| content=package, | ||
| relative_path=metadata_relative_path, | ||
| ) | ||
| created_count += 1 | ||
| except Exception: | ||
| # Failed to save metadata artifact | ||
| skipped_count += 1 | ||
|
|
||
| print(f"Created {created_count} missing metadata artifacts. Skipped {skipped_count} packages.") | ||
|
|
||
|
|
||
| class Migration(migrations.Migration): | ||
|
|
||
| dependencies = [ | ||
| ("python", "0018_packageprovenance"), | ||
| ] | ||
|
|
||
| operations = [ | ||
| migrations.RunPython( | ||
| create_missing_metadata_artifacts, | ||
| reverse_code=migrations.RunPython.noop, | ||
| ), | ||
| ] | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.