@@ -113,9 +113,13 @@ class CommitInfo(NamedTuple):
113
113
114
114
def get_file_names (
115
115
cwd : Optional [str ] = None ,
116
+ path_filter : Optional [str ] = None ,
116
117
) -> List [CommitInfo ]:
118
+ cmd = "git log --date=short --pretty='format:%h;%ad' --numstat"
119
+ if path_filter :
120
+ cmd += f" -- { path_filter } "
117
121
lines = run_command (
118
- "git log --date=short --pretty='format:%h;%ad' --numstat" ,
122
+ cmd ,
119
123
cwd = cwd ,
120
124
env = {"TZ" : "UTC" },
121
125
).split ("\n " )
@@ -191,13 +195,22 @@ def conv_to_csv(json_data: List[Dict[str, Any]]) -> io.StringIO:
191
195
192
196
193
197
def main () -> None :
198
+ # Process the tutorials repo
199
+ print ("Processing tutorials repo" )
194
200
tutorials_dir = os .path .expanduser ("./tutorials" )
195
- get_history_log = get_history (tutorials_dir )
196
- commits_to_files = get_file_names (tutorials_dir )
201
+ tutorials_history_log = get_history (tutorials_dir )
202
+ tutorials_commits_to_files = get_file_names (tutorials_dir )
203
+
204
+ # Process the pytorch/docs dir
205
+ print ("Processing pytorch/docs dir" )
206
+ pytorch_docs_dir = os .path .expanduser ("./pytorch/docs" )
207
+ pytorch_docs_history_log = get_history (pytorch_docs_dir )
208
+ pytorch_docs_commits_to_files = get_file_names (
209
+ os .path .expanduser ("./pytorch" ), "docs"
210
+ )
197
211
198
- # Upload data to S3 as csv with gzip compression and no header line
212
+ # Combine the two histories
199
213
200
- print (f"Uploading data to { METADATA_PATH } " )
201
214
history_log = [
202
215
{
203
216
"commit_id" : i [0 ],
@@ -207,21 +220,54 @@ def main() -> None:
207
220
"number_of_changed_files" : int (i [4 ]),
208
221
"lines_added" : int (i [5 ]),
209
222
"lines_deleted" : int (i [6 ]),
223
+ "repo" : "tutorials" ,
210
224
}
211
- for i in get_history_log
225
+ for i in tutorials_history_log
212
226
]
227
+
228
+ history_log .extend (
229
+ [
230
+ {
231
+ "commit_id" : i [0 ],
232
+ "author" : i [1 ],
233
+ "date" : i [2 ],
234
+ "title" : i [3 ],
235
+ "number_of_changed_files" : int (i [4 ]),
236
+ "lines_added" : int (i [5 ]),
237
+ "lines_deleted" : int (i [6 ]),
238
+ "repo" : "pytorch" ,
239
+ }
240
+ for i in pytorch_docs_history_log
241
+ ]
242
+ )
243
+
244
+ # Combine the two commits to files
245
+
246
+ filenames = []
247
+ for entry in tutorials_commits_to_files :
248
+ items = convert_to_dict (entry )
249
+ for item in items :
250
+ item ["filename" ] = f"tutorials/{ item ['filename' ]} "
251
+ filenames .extend (items )
252
+
253
+ for entry in pytorch_docs_commits_to_files :
254
+ items = convert_to_dict (entry )
255
+ for item in items :
256
+ item ["filename" ] = f"pytorch/{ item ['filename' ]} "
257
+ filenames .extend (items )
258
+
259
+ # Upload data to S3 as csv with gzip compression and no header line
260
+
261
+ print (f"Uploading data to { METADATA_PATH } " )
213
262
upload_to_s3 (
214
263
"ossci-raw-job-status" ,
215
264
f"{ METADATA_PATH } " ,
216
265
history_log ,
217
266
)
218
267
print (f"Finished uploading data to { METADATA_PATH } " )
219
268
269
+ # Upload filenames to S3
220
270
print (f"Uploading data to { FILENAMES_PATH } " )
221
- filenames = []
222
- for entry in commits_to_files :
223
- items = convert_to_dict (entry )
224
- filenames .extend (items )
225
271
upload_to_s3 (
226
272
"ossci-raw-job-status" ,
227
273
f"{ FILENAMES_PATH } " ,
0 commit comments