26
26
from macaron .json_tools import json_extract
27
27
from macaron .malware_analyzer .datetime_parser import parse_datetime
28
28
from macaron .slsa_analyzer .package_registry .package_registry import PackageRegistry
29
- from macaron .util import download_file_with_size_limit , send_get_http_raw , stream_file_with_size_limit
29
+ from macaron .util import download_file_with_size_limit , send_get_http_raw , stream_file_with_size_limit , send_head_http_raw
30
30
31
31
if TYPE_CHECKING :
32
32
from macaron .slsa_analyzer .specs .package_registry_spec import PackageRegistryInfo
@@ -443,6 +443,33 @@ def extract_attestation(attestation_data: dict) -> dict | None:
443
443
return attestations [0 ]
444
444
445
445
446
+ # as per https://github.com/pypi/inspector/blob/main/inspector/main.py line 125
447
+ INSPECTOR_TEMPLATE = (
448
+ "{inspector_url_scheme}://{inspector_url_netloc}/project/"
449
+ "{name}/{version}/packages/{first}/{second}/{rest}/{filename}"
450
+ )
451
+
452
+
453
+ @dataclass
454
+ class PyPIInspectorAsset :
455
+ """The package PyPI inspector information."""
456
+
457
+ #: the pypi inspector link to the tarball
458
+ package_sdist_link : str
459
+
460
+ #: the pypi inspector link(s) to the wheel(s)
461
+ package_whl_links : list [str ]
462
+
463
+ #: a mapping of inspector links to whether they are reachable
464
+ package_link_reachability : dict [str , bool ]
465
+
466
+ def __bool__ (self ) -> bool :
467
+ """Determine if this inspector object is empty."""
468
+ if (self .package_sdist_link or self .package_whl_links ) and self .package_link_reachability :
469
+ return True
470
+ return False
471
+
472
+
446
473
@dataclass
447
474
class PyPIPackageJsonAsset :
448
475
"""The package JSON hosted on the PyPI registry."""
@@ -465,6 +492,9 @@ class PyPIPackageJsonAsset:
465
492
#: the source code temporary location name
466
493
package_sourcecode_path : str
467
494
495
+ #: the pypi inspector information about this package
496
+ inspector_asset : PyPIInspectorAsset
497
+
468
498
#: The size of the asset (in bytes). This attribute is added to match the AssetLocator
469
499
#: protocol and is not used because pypi API registry does not provide it.
470
500
@property
@@ -718,6 +748,91 @@ def get_sha256(self) -> str | None:
718
748
logger .debug ("Found sha256 hash: %s" , artifact_hash )
719
749
return artifact_hash
720
750
751
+ def get_inspector_links (self ) -> bool :
752
+ """Generate PyPI inspector links for this package version's distributions and fill in the inspector asset.
753
+
754
+ Returns
755
+ -------
756
+ bool
757
+ True if the link generation was successful, False otherwise.
758
+ """
759
+ if self .inspector_asset :
760
+ return True
761
+
762
+ if not self .package_json and not self .download ("" ):
763
+ logger .warning ("No package metadata available, cannot get links" )
764
+ return False
765
+
766
+ releases = self .get_releases ()
767
+ if releases is None :
768
+ logger .warning ("Package has no releases, cannot create inspector links." )
769
+ return False
770
+
771
+ version = self .component_version
772
+ if self .component_version is None :
773
+ version = self .get_latest_version ()
774
+
775
+ if version is None :
776
+ logger .warning ("No version set, and no latest version exists. cannot create inspector links." )
777
+ return False
778
+
779
+ distributions = json_extract (releases , [version ], list )
780
+
781
+ if not distributions :
782
+ logger .warning (
783
+ "Package has no distributions for release version %s. Cannot create inspector links." , version
784
+ )
785
+ return False
786
+
787
+ for distribution in distributions :
788
+ package_type = json_extract (distribution , ["packagetype" ], str )
789
+ if package_type is None :
790
+ logger .warning ("The version %s has no 'package type' field in a distribution" , version )
791
+ continue
792
+
793
+ name = json_extract (self .package_json , ["info" , "name" ], str )
794
+ if name is None :
795
+ logger .warning ("The version %s has no 'name' field in a distribution" , version )
796
+ continue
797
+
798
+ blake2b_256 = json_extract (distribution , ["digests" , "blake2b_256" ], str )
799
+ if blake2b_256 is None :
800
+ logger .warning ("The version %s has no 'blake2b_256' field in a distribution" , version )
801
+ continue
802
+
803
+ filename = json_extract (distribution , ["filename" ], str )
804
+ if filename is None :
805
+ logger .warning ("The version %s has no 'filename' field in a distribution" , version )
806
+ continue
807
+
808
+ link = INSPECTOR_TEMPLATE .format (
809
+ inspector_url_scheme = self .pypi_registry .inspector_url_scheme ,
810
+ inspector_url_netloc = self .pypi_registry .inspector_url_netloc ,
811
+ name = name ,
812
+ version = version ,
813
+ first = blake2b_256 [0 :2 ],
814
+ second = blake2b_256 [2 :4 ],
815
+ rest = blake2b_256 [4 :],
816
+ filename = filename ,
817
+ )
818
+
819
+ # use a head request because we don't care about the response contents
820
+ reachable = False
821
+ if send_head_http_raw (link ):
822
+ reachable = True # link was reachable
823
+
824
+ if package_type == "sdist" :
825
+ self .inspector_asset .package_sdist_link = link
826
+ self .inspector_asset .package_link_reachability [link ] = reachable
827
+ elif package_type == "bdist_wheel" :
828
+ self .inspector_asset .package_whl_links .append (link )
829
+ self .inspector_asset .package_link_reachability [link ] = reachable
830
+ else : # no other package types exist, so else statement should never occur
831
+ logger .debug ("Unknown package distribution type: %s" , package_type )
832
+
833
+ # if all distributions were invalid and went along a 'continue' path
834
+ return bool (self .inspector_asset )
835
+
721
836
722
837
def find_or_create_pypi_asset (
723
838
asset_name : str , asset_version : str | None , pypi_registry_info : PackageRegistryInfo
@@ -755,6 +870,8 @@ def find_or_create_pypi_asset(
755
870
logger .debug ("Failed to create PyPIPackageJson asset." )
756
871
return None
757
872
758
- asset = PyPIPackageJsonAsset (asset_name , asset_version , False , package_registry , {}, "" )
873
+ asset = PyPIPackageJsonAsset (
874
+ asset_name , asset_version , False , package_registry , {}, "" , PyPIInspectorAsset ("" , [], {})
875
+ )
759
876
pypi_registry_info .metadata .append (asset )
760
877
return asset
0 commit comments