Skip to content

Commit a193ddd

Browse files
authored
Expand script to collect dates for the pytorch/docs dir (#6534)
- Modify the script to get last_updated for the files in pytorch repo under the docs directory.
1 parent 239b94f commit a193ddd

File tree

2 files changed

+62
-14
lines changed

2 files changed

+62
-14
lines changed

.github/scripts/get_tutorials_stats.py

+56-10
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,13 @@ class CommitInfo(NamedTuple):
113113

114114
def get_file_names(
115115
cwd: Optional[str] = None,
116+
path_filter: Optional[str] = None,
116117
) -> List[CommitInfo]:
118+
cmd = "git log --date=short --pretty='format:%h;%ad' --numstat"
119+
if path_filter:
120+
cmd += f" -- {path_filter}"
117121
lines = run_command(
118-
"git log --date=short --pretty='format:%h;%ad' --numstat",
122+
cmd,
119123
cwd=cwd,
120124
env={"TZ": "UTC"},
121125
).split("\n")
@@ -191,13 +195,22 @@ def conv_to_csv(json_data: List[Dict[str, Any]]) -> io.StringIO:
191195

192196

193197
def main() -> None:
198+
# Process the tutorials repo
199+
print("Processing tutorials repo")
194200
tutorials_dir = os.path.expanduser("./tutorials")
195-
get_history_log = get_history(tutorials_dir)
196-
commits_to_files = get_file_names(tutorials_dir)
201+
tutorials_history_log = get_history(tutorials_dir)
202+
tutorials_commits_to_files = get_file_names(tutorials_dir)
203+
204+
# Process the pytorch/docs dir
205+
print("Processing pytorch/docs dir")
206+
pytorch_docs_dir = os.path.expanduser("./pytorch/docs")
207+
pytorch_docs_history_log = get_history(pytorch_docs_dir)
208+
pytorch_docs_commits_to_files = get_file_names(
209+
os.path.expanduser("./pytorch"), "docs"
210+
)
197211

198-
# Upload data to S3 as csv with gzip compression and no header line
212+
# Combine the two histories
199213

200-
print(f"Uploading data to {METADATA_PATH}")
201214
history_log = [
202215
{
203216
"commit_id": i[0],
@@ -207,21 +220,54 @@ def main() -> None:
207220
"number_of_changed_files": int(i[4]),
208221
"lines_added": int(i[5]),
209222
"lines_deleted": int(i[6]),
223+
"repo": "tutorials",
210224
}
211-
for i in get_history_log
225+
for i in tutorials_history_log
212226
]
227+
228+
history_log.extend(
229+
[
230+
{
231+
"commit_id": i[0],
232+
"author": i[1],
233+
"date": i[2],
234+
"title": i[3],
235+
"number_of_changed_files": int(i[4]),
236+
"lines_added": int(i[5]),
237+
"lines_deleted": int(i[6]),
238+
"repo": "pytorch",
239+
}
240+
for i in pytorch_docs_history_log
241+
]
242+
)
243+
244+
# Combine the two commits to files
245+
246+
filenames = []
247+
for entry in tutorials_commits_to_files:
248+
items = convert_to_dict(entry)
249+
for item in items:
250+
item["filename"] = f"tutorials/{item['filename']}"
251+
filenames.extend(items)
252+
253+
for entry in pytorch_docs_commits_to_files:
254+
items = convert_to_dict(entry)
255+
for item in items:
256+
item["filename"] = f"pytorch/{item['filename']}"
257+
filenames.extend(items)
258+
259+
# Upload data to S3 as csv with gzip compression and no header line
260+
261+
print(f"Uploading data to {METADATA_PATH}")
213262
upload_to_s3(
214263
"ossci-raw-job-status",
215264
f"{METADATA_PATH}",
216265
history_log,
217266
)
218267
print(f"Finished uploading data to {METADATA_PATH}")
219268

269+
# Upload filenames to S3
220270
print(f"Uploading data to {FILENAMES_PATH}")
221-
filenames = []
222-
for entry in commits_to_files:
223-
items = convert_to_dict(entry)
224-
filenames.extend(items)
225271
upload_to_s3(
226272
"ossci-raw-job-status",
227273
f"{FILENAMES_PATH}",

.github/workflows/upload-tutorials-stats.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
1-
# This is a basic workflow to help you get started with Actions
2-
31
name: Upload tutorials stat
42

5-
# Controls when the workflow will run
63
on:
74
schedule:
85
# Run this once per day.
96
- cron: "0 0 * * *"
10-
# Allows you to run this workflow manually from the Actions tab
117
workflow_dispatch:
128

139
jobs:
@@ -32,6 +28,12 @@ jobs:
3228
path: './tutorials'
3329
fetch-depth: 0
3430

31+
- name: Checkout the pytorch repo
32+
uses: actions/checkout@v4
33+
with:
34+
path: './pytorch'
35+
fetch-depth: 0
36+
3537
- name: Checkout the test-infra repo
3638
uses: actions/checkout@v4
3739
with:

0 commit comments

Comments
 (0)