@@ -659,54 +659,6 @@ def create_full_scan_with_report_url(
659659 # Return result in the format expected by the user
660660 return diff
661661
662- def check_full_scans_status (self , head_full_scan_id : str , new_full_scan_id : str ) -> bool :
663- is_ready = False
664- current_timeout = self .config .timeout
665- self .sdk .set_timeout (0.5 )
666- try :
667- self .sdk .fullscans .stream (self .config .org_slug , head_full_scan_id )
668- except Exception :
669- log .debug (f"Queued up full scan for processing ({ head_full_scan_id } )" )
670-
671- try :
672- self .sdk .fullscans .stream (self .config .org_slug , new_full_scan_id )
673- except Exception :
674- log .debug (f"Queued up full scan for processing ({ new_full_scan_id } )" )
675- self .sdk .set_timeout (current_timeout )
676- start_check = time .time ()
677- head_is_ready = False
678- new_is_ready = False
679- while not is_ready :
680- head_full_scan_metadata = self .sdk .fullscans .metadata (self .config .org_slug , head_full_scan_id )
681- if head_full_scan_metadata :
682- head_state = head_full_scan_metadata .get ("scan_state" )
683- else :
684- head_state = None
685- new_full_scan_metadata = self .sdk .fullscans .metadata (self .config .org_slug , new_full_scan_id )
686- if new_full_scan_metadata :
687- new_state = new_full_scan_metadata .get ("scan_state" )
688- else :
689- new_state = None
690- if head_state and head_state == "resolve" :
691- head_is_ready = True
692- if new_state and new_state == "resolve" :
693- new_is_ready = True
694- if head_is_ready and new_is_ready :
695- is_ready = True
696- current_time = time .time ()
697- if current_time - start_check >= self .config .timeout :
698- log .debug (
699- f"Timeout reached while waiting for full scans to be ready "
700- f"({ head_full_scan_id } , { new_full_scan_id } )"
701- )
702- break
703- total_time = time .time () - start_check
704- if is_ready :
705- log .info (f"Full scans are ready in { total_time :.2f} seconds" )
706- else :
707- log .warning (f"Full scans are not ready yet ({ head_full_scan_id } , { new_full_scan_id } )" )
708- return is_ready
709-
710662 def get_full_scan (self , full_scan_id : str ) -> FullScan :
711663 """
712664 Get a FullScan object for an existing full scan including sbom_artifacts and packages.
@@ -846,28 +798,54 @@ def update_package_values(pkg: Package) -> Package:
846798 pkg .url += f"/{ pkg .name } /overview/{ pkg .version } "
847799 return pkg
848800
849- def get_license_text_via_purl (self , packages : dict [str , Package ]) -> dict :
850- components = []
801+ def get_license_text_via_purl (self , packages : dict [str , Package ], batch_size : int = 5000 ) -> dict :
802+ """Get license attribution and details via PURL endpoint in batches.
803+
804+ Args:
805+ packages: Dictionary of packages to get license info for
806+ batch_size: Maximum number of packages to process per API call (1-9999)
807+
808+ Returns:
809+ Updated packages dictionary with licenseAttrib and licenseDetails populated
810+ """
811+ # Validate batch size
812+ batch_size = max (1 , min (9999 , batch_size ))
813+
814+ # Build list of all components
815+ all_components = []
851816 for purl in packages :
852817 full_purl = f"pkg:/{ purl } "
853- components .append ({"purl" : full_purl })
854- results = self .sdk .purl .post (
855- license = True ,
856- components = components ,
857- licenseattrib = True ,
858- licensedetails = True
859- )
860- purl_packages = []
861- for result in results :
862- ecosystem = result ["type" ]
863- name = result ["name" ]
864- package_version = result ["version" ]
865- licenseDetails = result .get ("licenseDetails" )
866- licenseAttrib = result .get ("licenseAttrib" )
867- purl = f"{ ecosystem } /{ name } @{ package_version } "
868- if purl not in purl_packages and purl in packages :
869- packages [purl ].licenseAttrib = licenseAttrib
870- packages [purl ].licenseDetails = licenseDetails
818+ all_components .append ({"purl" : full_purl })
819+
820+ # Process in batches
821+ total_components = len (all_components )
822+ log .debug (f"Processing { total_components } packages in batches of { batch_size } " )
823+
824+ for i in range (0 , total_components , batch_size ):
825+ batch_components = all_components [i :i + batch_size ]
826+ batch_num = (i // batch_size ) + 1
827+ total_batches = (total_components + batch_size - 1 ) // batch_size
828+ log .debug (f"Processing batch { batch_num } /{ total_batches } ({ len (batch_components )} packages)" )
829+
830+ results = self .sdk .purl .post (
831+ license = True ,
832+ components = batch_components ,
833+ licenseattrib = True ,
834+ licensedetails = True
835+ )
836+
837+ purl_packages = []
838+ for result in results :
839+ ecosystem = result ["type" ]
840+ name = result ["name" ]
841+ package_version = result ["version" ]
842+ licenseDetails = result .get ("licenseDetails" )
843+ licenseAttrib = result .get ("licenseAttrib" )
844+ purl = f"{ ecosystem } /{ name } @{ package_version } "
845+ if purl not in purl_packages and purl in packages :
846+ packages [purl ].licenseAttrib = licenseAttrib
847+ packages [purl ].licenseDetails = licenseDetails
848+
871849 return packages
872850
873851 def get_added_and_removed_packages (
@@ -960,7 +938,14 @@ def get_added_and_removed_packages(
960938 log .error (f"Artifact details - name: { artifact .name } , version: { artifact .version } " )
961939 log .error ("No matching packages found in head_full_scan" )
962940
963- packages = self .get_license_text_via_purl (packages )
941+ # Only fetch license details if generate_license is enabled
942+ if self .cli_config and self .cli_config .generate_license :
943+ log .debug ("Fetching license details via PURL endpoint" )
944+ batch_size = self .cli_config .max_purl_batch_size if self .cli_config else 5000
945+ packages = self .get_license_text_via_purl (packages , batch_size = batch_size )
946+ else :
947+ log .debug ("Skipping PURL endpoint call (--generate-license not set)" )
948+
964949 return added_packages , removed_packages , packages
965950
966951 def create_new_diff (
@@ -1092,9 +1077,6 @@ def create_new_diff(
10921077 log .warning (f"Failed to clean up temporary file { temp_file } : { e } " )
10931078
10941079 # Handle diff generation - now we always have both scans
1095- scans_ready = self .check_full_scans_status (head_full_scan_id , new_full_scan .id )
1096- if scans_ready is False :
1097- log .error (f"Full scans did not complete within { self .config .timeout } seconds" )
10981080 (
10991081 added_packages ,
11001082 removed_packages ,
0 commit comments