11import json
2+ from json .decoder import JSONDecodeError
23import base64
3- import argparse
44import os
5+ import requests
6+ import re
57
8+ from time import sleep , mktime , gmtime , time , localtime
69from typing import Dict , Optional
7- from github import Github , Repository , GithubException , Organization
10+
11+ RETRIES = 5
12+
13+
14+ def hit_endpoint (url ,token ,method = 'GET' ):
15+ headers = {"Authorization" : f"bearer { token } " }
16+
17+ attempts = 0
18+ while attempts < RETRIES :
19+
20+ response = requests .request (method , url , headers = headers ,timeout = 10 )
21+
22+ try :
23+ if response .status_code == 200 :
24+ response_json = json .loads (response .text )
25+ break
26+ elif response .status_code in (403 ,429 ):
27+ #rate limit was triggered.
28+ wait_until = int (response .headers .get ("x-ratelimit-reset" ))
29+ wait_in_seconds = int (
30+ mktime (gmtime (wait_until )) -
31+ mktime (gmtime (time ()))
32+ )
33+ wait_until_time = localtime (wait_until )
34+
35+ print (f"Ran into rate limit sleeping for { self .name } !" )
36+ print (
37+ f"sleeping until { wait_until_time .tm_hour } :{ wait_until_time .tm_min } ({ wait_in_seconds } seconds)"
38+ )
39+ sleep (wait_in_seconds )
40+
41+ response_json = {}
42+ attempts += 1
43+
44+ if attempts >= RETRIES :
45+ raise ConnectionError (
46+ f"Rate limit was reached and couldn't be rectified after { attempts } tries"
47+ )
48+ else :
49+ print (response .status_code )
50+ raise ConnectionError ("Rate limit error!" )
51+ except JSONDecodeError :
52+ response_json = {}
53+ attempts += 1
54+
55+ return response_json
56+
57+
58+ def get_repo_owner_and_name (repo_http_url ):
59+ """ Gets the owner and repo from a url.
60+
61+ Args:
62+ url: Github url
63+
64+ Returns:
65+ Tuple of owner and repo. Or a tuple of None and None if the url is invalid.
66+ """
67+
68+ # Regular expression to parse a GitHub URL into two groups
69+ # The first group contains the owner of the github repo extracted from the url
70+ # The second group contains the name of the github repo extracted from the url
71+ # 'But what is a regular expression?' ----> https://docs.python.org/3/howto/regex.html
72+ if 'github' in repo_http_url :
73+ regex = r"https?:\/\/github\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
74+ elif 'gitlab' in repo_http_url :
75+ regex = r"https?:\/\/gitlab\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
76+ elif 'bitbucket' in repo_http_url :
77+ regex = r"https?:\/\/bitbucket\.org\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
78+
79+ result = re .search (regex , repo_http_url )
80+
81+ if not result :
82+ return None , None
83+
84+ capturing_groups = result .groups ()
85+
86+ owner = capturing_groups [0 ]
87+ repo = capturing_groups [1 ]
88+
89+ return owner , repo
90+
891
992
1093class IndexGenerator :
11- def __init__ (self , agency : str , verison : str , token : Optional [str ] = None ,):
12- self .github = Github (token ) if token else Github ()
94+ def __init__ (self , agency : str , version : str , token : Optional [str ] = None , bitbucket_user : Optional [str ] = None , bitbucket_password : Optional [str ] = None , gitlab_token : Optional [str ] = None ):
1395
14- # user can change agency and version depending on paramters
96+ # user can change agency and version depending on parameters
1597 self .index = {
1698 "agency" : agency ,
17- "version" : verison ,
99+ "version" : version ,
18100 "measurementType" : {
19101 "method" : "projects"
20102 },
21103 "releases" : []
22104 }
23105
24- def get_code_json (self , repo : Repository ) -> Optional [Dict ]:
106+ self .token = token
107+ self .gitlab_token = gitlab_token
108+ self .bitbucket_user = bitbucket_user
109+ self .bitbucket_password = bitbucket_password
110+
111+ def get_code_json_github (self ,repo : str ) -> Optional [Dict ]:
25112 try :
26- content = repo .get_contents ("code.json" , ref = repo .default_branch )
27- except GithubException as e :
28- print (f"GitHub Error: { e .data .get ('message' , 'No message available' )} " )
113+ owner ,name = get_repo_owner_and_name (repo )
114+ code_json_endpoint = f"https://api.github.com/repos/{ owner } /{ name } /contents/code.json"
115+ content_dict = hit_endpoint (code_json_endpoint ,self .token )#repo.get_contents("code.json", ref = repo.default_branch)
116+ except Exception as e :
117+ print (f"GitHub Error: { e } " )
29118 return None
30119
31120 try :
32- decoded_content = base64 .b64decode (content . content )
121+ decoded_content = base64 .b64decode (content_dict [ ' content' ] )
33122 return json .loads (decoded_content )
34123 except (json .JSONDecodeError , ValueError ) as e :
35124 print (f"JSON Error: { str (e )} " )
36125 return None
37126
38- def save_code_json (self , repo : Repository , output_path : str ) -> Optional [str ]:
127+ def get_code_json_gitlab (self ,repo : str ) -> Optional [Dict ]:
128+ try :
129+ owner ,name = get_repo_owner_and_name (repo )
130+ code_json_endpoint = f"https://gitlab.com/api/v4/projects/{ owner } %2F{ name } /repository/files/code.json?ref=HEAD"
131+ content_dict = hit_endpoint (code_json_endpoint ,self .gitlab_token )
132+ except Exception as e :
133+ print ("Problem querying the Gitlab API" )
134+ return None
135+
136+ try :
137+ decoded_content = base64 .b64decode (content_dict ['content' ])
138+ return json .loads (decoded_content )
139+ except (json .JSONDecodeError , ValueError ) as e :
140+ print (f"JSON Error { e } " )
141+ return None
142+
143+ def get_code_json_bitbucket (self ,repo : str ) -> Optional [Dict ]:
144+ try :
145+ owner , name = get_repo_owner_and_name (repo )
146+ code_json_endpoint = f"https://bitbucket.org/{ owner } /{ name } /raw/HEAD/code.json"
147+ session = requests .Session ()
148+ session .auth = (self .bitbucket_user ,self .bitbucket_password )
149+
150+ auth = session .post ('http://bitbucket.org' )
151+ response_dict = session .get (code_json_endpoint )
152+ except Exception as e :
153+ print (f"Exception when querying bitbucket.org: { e } " )
154+
155+ return json .loads (response_dict .text )
156+
157+ def get_code_json (self , repo : str ) -> Optional [Dict ]:
158+ if 'github' in repo :
159+ return self .get_code_json_github (repo )
160+ elif 'gitlab' in repo :
161+ return self .get_code_json_gitlab (repo )
162+ elif 'bitbucket' in repo :
163+ return self .get_code_json_bitbucket (repo )
164+ else :
165+ return None
166+
167+ def save_code_json (self , repo : str , output_path : str ) -> Optional [str ]:
39168
40169 res = self .get_code_json (repo )
41170
@@ -57,46 +186,78 @@ def update_index(self, index: Dict, code_json: Dict, org_name: str, repo_name: s
57186
58187 index ['releases' ].append (baseline )
59188
60- def get_org_repos (self , org_name : str ) -> list [Organization ]:
189+ def get_github_org_repos (self , org_name : str ) -> list [Dict ]:
61190 try :
62- org = self .github .get_organization ( org_name )
191+ org_endpoint = f"https://api .github.com/orgs/ { org_name } /repos"
63192 print (f"\n Processing organization: { org_name } " )
64193
65- total_repos = org .public_repos
194+ repo_list = hit_endpoint (org_endpoint ,self .token )
195+
196+
197+ total_repos = len (repo_list )
66198 print (f"Found { total_repos } public repositories" )
67199
68- return total_repos
69- except GithubException as e :
200+ return repo_list
201+ except Exception as e :
70202 raise e
71203
72- def save_organization_files (self , org_name : str , codeJSONPath ) -> None :
73- raise NotImplementedError
204+ def _enumerate_repo_orgs (self ,id ,org_name ,repo_name , url , total_repos , codeJSONPath = None ,add_to_index = True ):
205+ print (f"\n Checking { repo_name } [{ id } /{ total_repos } ]" )
206+
207+ if not codeJSONPath :
208+ code_json = self .get_code_json (url )
209+ else :
210+ repoPath = os .path .join (codeJSONPath , (repo_name + '.json' ))
211+ code_json = self .save_code_json (url ,repoPath )
212+
213+ if code_json and add_to_index :
214+ print (f"✅ Found code.json in { repo_name } " )
215+ self .update_index (self .index , code_json , org_name , repo_name )
216+ elif not code_json :
217+ print (f"❌ No code.json found in { repo_name } " )
218+
219+ def process_github_org_files (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
220+ orgs = self .get_github_org_repos (org_name )
221+ total_repos = len (orgs )
222+
223+ for id , repo in enumerate (orgs , 1 ):
224+ try :
225+ self ._enumerate_repo_orgs (
226+ id ,org_name ,repo ['name' ],repo ['svn_url' ],total_repos ,codeJSONPath = codeJSONPath ,add_to_index = add_to_index
227+ )
228+ except Exception as e :
229+ print (e )
230+
231+ def get_gitlab_org_repos (self , org_name : str ) -> list [Dict ]:
232+ try :
233+ url_encoded_org_name = org_name .replace ("/" ,"%2F" )
234+ org_endpoint = f"https://gitlab.com/api/v4/groups/{ url_encoded_org_name } /projects"
235+
236+ repo_list = hit_endpoint (org_endpoint ,self .gitlab_token )
237+
238+ total_repos = len (repo_list )
239+ print (f"Found { total_repos } public repositories" )
240+
241+ return total_repos
242+ except Exception as e :
243+ print (f"Ran into Exception when querying Gitlab Repos in group { org_name } : { e } " )
244+ return None
74245
75- def process_organization (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
246+ def process_gitlab_org_files (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
76247 try :
77- org = self .github . get_organization (org_name )
78- total_repos = self . get_org_repos ( org_name )
248+ orgs = self .get_gitlab_org_repos (org_name )
249+ total_repos = len ( orgs )
79250
80- for id , repo in enumerate (org .get_repos (type = 'public' ), 1 ):
81- print (f"\n Checking { repo .name } [{ id } /{ total_repos } ]" )
82-
83- if not codeJSONPath :
84- code_json = self .get_code_json (repo )
85- else :
86- repoPath = os .path .join (codeJSONPath , (repo .name + '.json' ))
87- code_json = self .save_code_json (repo ,repoPath )
88-
89- if code_json and add_to_index :
90- print (f"✅ Found code.json in { repo .name } " )
91- self .update_index (self .index , code_json , org_name , repo .name )
92- elif not code_json :
93- print (f"❌ No code.json found in { repo .name } " )
251+ for id , repo in enumerate (orgs , 1 ):
252+ self ._enumerate_repo_orgs (
253+ id ,org_name ,repo ['name' ],repo ['web_url' ],total_repos ,codeJSONPath = codeJSONPath ,add_to_index = add_to_index
254+ )
94255
95- except GithubException as e :
256+ except Exception as e :
96257 print (f"Error processing organization { org_name } : { str (e )} " )
97258
98259 def save_index (self , output_path : str ) -> None :
99- # sorts index by organizaiton then by name
260+ # sorts index by organization then by name
100261 self .index ['releases' ].sort (key = lambda x : (x .get ('organization' , '' ), x .get ('name' , '' )))
101262
102263 with open (output_path , 'w' ) as f :
0 commit comments