@@ -70,7 +70,13 @@ def get_repo_owner_and_name(repo_http_url):
70
70
# The first group contains the owner of the github repo extracted from the url
71
71
# The second group contains the name of the github repo extracted from the url
72
72
# 'But what is a regular expression?' ----> https://docs.python.org/3/howto/regex.html
73
- regex = r"https?:\/\/github\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
73
+ if 'github' in repo_http_url :
74
+ regex = r"https?:\/\/github\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
75
+ elif 'gitlab' in repo_http_url :
76
+ regex = r"https?:\/\/gitlab\.com\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
77
+ elif 'bitbucket' in repo_http_url :
78
+ regex = r"https?:\/\/bitbucket\.org\/([A-Za-z0-9 \- _]+)\/([A-Za-z0-9 \- _ \.]+)(.git)?\/?$"
79
+
74
80
result = re .search (regex , repo_http_url )
75
81
76
82
if not result :
@@ -86,7 +92,7 @@ def get_repo_owner_and_name(repo_http_url):
86
92
87
93
88
94
class IndexGenerator :
89
- def __init__ (self , agency : str , version : str , token : Optional [str ] = None ,):
95
+ def __init__ (self , agency : str , version : str , token : Optional [str ] = None , bitbucket_user : Optional [ str ] = None , bitbucket_password : Optional [ str ] = None , gitlab_token : Optional [ str ] = None ):
90
96
91
97
# user can change agency and version depending on parameters
92
98
self .index = {
@@ -99,6 +105,9 @@ def __init__(self, agency: str, version: str, token: Optional[str] = None,):
99
105
}
100
106
101
107
self .token = token
108
+ self .gitlab_token = gitlab_token
109
+ self .bitbucket_user = bitbucket_user
110
+ self .bitbucket_password = bitbucket_password
102
111
103
112
def get_code_json_github (self ,repo : str ) -> Optional [Dict ]:
104
113
try :
@@ -116,14 +125,45 @@ def get_code_json_github(self,repo : str) -> Optional[Dict]:
116
125
print (f"JSON Error: { str (e )} " )
117
126
return None
118
127
119
- def get_code_json_other (self ,repo : str ) -> Optional [Dict ]:
120
- return None
128
+ def get_code_json_gitlab (self ,repo : str ) -> Optional [Dict ]:
129
+ try :
130
+ owner ,name = get_repo_owner_and_name (repo )
131
+ code_json_endpoint = f"https://gitlab.com/api/v4/projects/{ owner } %2F{ name } /repository/files/code.json?ref=HEAD"
132
+ content_dict = hit_endpoint (code_json_endpoint ,self .gitlab_token )
133
+ except Exception as e :
134
+ print ("Problem querying the Gitlab API" )
135
+ return None
136
+
137
+ try :
138
+ decoded_content = base64 .b64decode (content_dict ['content' ])
139
+ return json .loads (decoded_content )
140
+ except (json .JSONDecodeError , ValueError ) as e :
141
+ print (f"JSON Error { e } " )
142
+ return None
143
+
144
+ def get_code_json_bitbucket (self ,repo : str ) -> Optional [Dict ]:
145
+ try :
146
+ owner , name = get_repo_owner_and_name (repo )
147
+ code_json_endpoint = f"https://bitbucket.org/{ owner } /{ name } /raw/HEAD/code.json"
148
+ session = requests .Session ()
149
+ session .auth = (self .bitbucket_user ,self .bitbucket_password )
150
+
151
+ auth = session .post ('http://bitbucket.org' )
152
+ response_dict = session .get (code_json_endpoint )
153
+ except Exception as e :
154
+ print (f"Exception when querying bitbucket.org: { e } " )
155
+
156
+ return json .loads (response_dict .text )
121
157
122
158
def get_code_json (self , repo : str ) -> Optional [Dict ]:
123
159
if 'github' in repo :
124
160
return self .get_code_json_github (repo )
161
+ elif 'gitlab' in repo :
162
+ return self .get_code_json_gitlab (repo )
163
+ elif 'bitbucket' in repo :
164
+ return self .get_code_json_bitbucket (repo )
125
165
else :
126
- return self . get_code_json_other ( repo )
166
+ return None
127
167
128
168
def save_code_json (self , repo : str , output_path : str ) -> Optional [str ]:
129
169
@@ -147,7 +187,7 @@ def update_index(self, index: Dict, code_json: Dict, org_name: str, repo_name: s
147
187
148
188
index ['releases' ].append (baseline )
149
189
150
- def get_org_repos (self , org_name : str ) -> list [Dict ]:
190
+ def get_github_org_repos (self , org_name : str ) -> list [Dict ]:
151
191
try :
152
192
org_endpoint = f"https://api.github.com/orgs/{ org_name } /repos"
153
193
print (f"\n Processing organization: { org_name } " )
@@ -162,34 +202,64 @@ def get_org_repos(self, org_name: str) -> list[Dict]:
162
202
except Exception as e :
163
203
raise e
164
204
165
- def save_organization_files (self , org_name : str , codeJSONPath ) -> None :
166
- raise NotImplementedError
205
+ def _enumerate_repo_orgs (self ,org_name ,repo_name , url , total_repos , codeJSONPath = None ):
206
+ print (f"\n Checking { repo_name } [{ id } /{ total_repos } ]" )
207
+
208
+ if not codeJSONPath :
209
+ code_json = self .get_code_json (url )
210
+ else :
211
+ repoPath = os .path .join (codeJSONPath , (repo_name + '.json' ))
212
+ code_json = self .save_code_json (url ,repoPath )
213
+
214
+ if code_json and add_to_index :
215
+ print (f"✅ Found code.json in { repo_name } " )
216
+ self .update_index (self .index , code_json , org_name , repo_name )
217
+ elif not code_json :
218
+ print (f"❌ No code.json found in { repo_name } " )
167
219
168
- def process_organization (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
220
+ def process_github_org_files (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
169
221
try :
170
- org = self .github . get_organization (org_name )
171
- total_repos = self . get_org_repos ( org_name )
222
+ orgs = self .get_github_org_repos (org_name )
223
+ total_repos = len ( orgs )
172
224
173
- for id , repo in enumerate (org .get_repos (type = 'public' ), 1 ):
174
- print (f"\n Checking { repo .name } [{ id } /{ total_repos } ]" )
175
-
176
- if not codeJSONPath :
177
- code_json = self .get_code_json (repo )
178
- else :
179
- repoPath = os .path .join (codeJSONPath , (repo .name + '.json' ))
180
- code_json = self .save_code_json (repo ,repoPath )
181
-
182
- if code_json and add_to_index :
183
- print (f"✅ Found code.json in { repo .name } " )
184
- self .update_index (self .index , code_json , org_name , repo .name )
185
- elif not code_json :
186
- print (f"❌ No code.json found in { repo .name } " )
225
+ for id , repo in enumerate (orgs , 1 ):
226
+ self ._enumerate_repo_orgs (
227
+ org_name ,repo ['name' ],repo ['svn_url' ],total_repos ,codeJSONPath = codeJSONPath
228
+ )
187
229
188
- except GithubException as e :
230
+ except Exception as e :
231
+ print (f"Error processing organization { org_name } : { str (e )} " )
232
+
233
+ def get_gitlab_org_repos (self , org_name : str ) -> list [Dict ]:
234
+ try :
235
+ url_encoded_org_name = org_name .replace ("/" ,"%2F" )
236
+ org_endpoint = f"https://gitlab.com/api/v4/groups/{ url_encoded_org_name } /projects"
237
+
238
+ repo_list = hit_endpoint (org_endpoint ,self .gitlab_token )
239
+
240
+ total_repos = len (repo_list )
241
+ print (f"Found { total_repos } public repositories" )
242
+
243
+ return total_repos
244
+ except Exception as e :
245
+ print (f"Ran into Exception when querying Gitlab Repos in group { org_name } : { e } " )
246
+ return None
247
+
248
+ def process_gitlab_org_files (self , org_name : str , add_to_index = True , codeJSONPath = None ) -> None :
249
+ try :
250
+ orgs = self .get_gitlab_org_repos (org_name )
251
+ total_repos = len (orgs )
252
+
253
+ for id , repo in enumerate (orgs , 1 ):
254
+ self ._enumerate_repo_orgs (
255
+ org_name ,repo ['name' ],repo ['web_url' ],total_repos ,codeJSONPath = codeJSONPath
256
+ )
257
+
258
+ except Exception as e :
189
259
print (f"Error processing organization { org_name } : { str (e )} " )
190
260
191
261
def save_index (self , output_path : str ) -> None :
192
- # sorts index by organizaiton then by name
262
+ # sorts index by organization then by name
193
263
self .index ['releases' ].sort (key = lambda x : (x .get ('organization' , '' ), x .get ('name' , '' )))
194
264
195
265
with open (output_path , 'w' ) as f :
0 commit comments