-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpr-report.py
208 lines (182 loc) · 7.59 KB
/
pr-report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
"""
This script shows the files deleted or modified in a PR in azureml-examples
If any of these files are referenced azure-ai-docs-pr,
the corresponding file (labeled referenced_from_file) is also shown.
To run this script, first run find_snippets.py to create the snippets.csv file.
Then run from command line:
python pr-report.py <PR number>
To decide if the PR is safe to merge:
* If any deleted cell in a MODIFIED file is referenced in azure-ai-docs-pr, PR is not ready to merge
* If any DELETED file is referenced, PR is not ready to merge.
"""
import pandas as pd
import sys
import gh_auth as a
import utilities as h
# read arguments from command line - pr and optionally, whether to authenticate
import argparse
parser = argparse.ArgumentParser(
description="Process a PR number."
) # Create the parser
# Add the arguments
parser.add_argument("pr", type=int, help="The PR number you are interested in.")
parser.add_argument("repo", type=str, nargs='?', default="ml", choices=["ai", "ml","fabric"], help="type of learning: 'ai' or 'ml'")
args = parser.parse_args() # Parse the arguments
pr = args.pr
repo_arg = args.repo.lower()
# fix truncation?
pd.set_option("display.max_colwidth", 500)
# form the URL for the GitHub API
if repo_arg == "ml":
repo_name = "azureml-examples"
owner_name = "Azure"
elif repo_arg == "ai":
repo_name = "azureai-samples"
owner_name = "Azure-Samples"
elif repo_arg == "fabric":
repo_name = "fabric-samples"
owner_name = "Microsoft"
url = f"https://api.github.com/repos/{owner_name}/{repo_name}/pulls/{pr}/files?per_page=100"
print(f"\n================ {repo_name} PR summary: {pr} ===================")
print(f"https://github.com/{owner_name}/{repo_name}/pull/{pr}/files\n")
prfiles = a.get_auth_response(url)
repo = a.connect_repo(f"{owner_name}/{repo_name}")
if "message" in prfiles:
print("Error occurred. Check the PR number and try again.")
print(prfiles)
sys.exit()
else:
deleted_files = [
file["filename"] for file in prfiles if file["status"] == "removed"
]
modified_files = [
(file["filename"], file["blob_url"])
for file in prfiles
if file["status"] == "modified"
]
added_files = [file["filename"] for file in prfiles if file["status"] == "added"]
renamed_files = [file["previous_filename"] for file in prfiles if file["status"] == "renamed"]
fn = f"refs-found-{repo_arg}.csv"
snippets = h.read_snippets(fn) # read the snippets file
# Process the files:
modified = len(modified_files)
deleted = len(deleted_files)
renamed = len(renamed_files)
print(f"PR {pr} changes {len(prfiles)} files.")
print(f"ADDED: {len(added_files)}") # just for info about the PR
print(f"MODIFIED: {modified}")
print(f"DELETED: {deleted}")
print(f"RENAMED: {renamed}\n")
# print("\nChanges that may affect azure-ai-docs-pr:\n")
data = [] # create an empty list to hold data for modified files that are referenced
nb_mods = [] # create an empty list to hold data for modified notebooks
### MODIFIED FILES
if modified > 0:
for file, blob_url in modified_files:
if (snippets["ref_file"] == file).any():
snippet_match = snippets.loc[snippets["ref_file"] == file, "from_file"]
# Check if there are deleted nb named cells or code comments
nb, adds, deletes, blob_url = h.find_changes(file, prfiles, blob_url)
# print (nb, adds, deletes)
if nb:
nb_mods.append(blob_url)
# print("added to nb_mods: ", file)
deleted_cells = [value for value in deletes if value not in adds]
if deleted_cells:
cell_type = "Notebook" if nb else "Code"
for cell in deleted_cells:
# Append the data to the list
data.append(
{
"Modified File": file,
"Referenced In": snippet_match.to_string(index=False),
"Cell Type": cell_type,
"Cell": cell,
}
)
# print(f"*** {cell}")
if data == []:
print(
"No problems with any of the modified files.\n"
)
else:
# Group the data by 'Modified File' and 'Referenced In'
grouped_data = {}
for item in data:
key = (item["Modified File"], item["Referenced In"])
if key not in grouped_data:
grouped_data[key] = []
grouped_data[key].append(item["Cell"])
print(f"Potential problems found in {len(grouped_data)} files.")
print(
"Fix these references in azure-ai-docs-pr before approving this PR:\n"
) # Print the grouped data
for (modified_file, referenced_in), cells in grouped_data.items():
print(f"Modified File: {modified_file} \n Referenced in:")
refs = referenced_in.split("\n")
for ref in refs:
print(
f" https://github.com/MicrosoftDocs/azure-ai-docs-pr/edit/main/articles/machine-learning/{ref.strip()}"
)
print(f" {cell_type} cells deleted: {len(cells)}")
for cell in cells:
print(f" * {cell}")
# compare the sha to this same file in branch "temp-fix"
h.compare_branches(repo, file, "main", "temp-fix")
# also print all the modified notebooks
if nb_mods:
print(
"MODIFIED NOTEBOOKS\nFollow each link to ensure notebooks are valid before approving the PR:"
)
nb_mods = list(set(nb_mods)) # remove duplicates
for file in nb_mods:
print(f"* {file}\n")
### DELETED FILES
if deleted > 0:
found = 0
for file in deleted_files:
if (snippets["ref_file"] == file).any():
snippet_match = snippets.loc[snippets["ref_file"] == file, "from_file"]
print(f"DELETED FILE: {file} \n Referenced in:")
refs = snippet_match.to_string(index=False).split("\n")
for ref in refs:
print(
f"* https://github.com/MicrosoftDocs/azure-ai-docs-pr/edit/main/articles/machine-learning/{ref.strip()}"
)
# print(snippet_match.to_string(index=False))
h.compare_branches(repo, file, "main", "temp-fix")
found = +1
if found == 0:
print(
"No problems with any of the deleted files.\n"
)
else:
print("Fix all references to deleted files before approving this PR.\n")
### RENAMED FILES
if renamed > 0:
found = 0
for file in renamed_files:
if (snippets["ref_file"] == file).any():
snippet_match = snippets.loc[snippets["ref_file"] == file, "from_file"]
print(f"RENAMED FILE: {file} \n Referenced in:")
refs = snippet_match.to_string(index=False).split("\n")
for ref in refs:
print(
f"* https://github.com/MicrosoftDocs/azure-ai-docs-pr/edit/main/articles/machine-learning/{ref.strip()}"
)
# print(snippet_match.to_string(index=False))
h.compare_branches(repo, file, "main", "temp-fix")
found = +1
if found == 0:
print(
"No problems with any of the renamed files.\n"
)
else:
print("Fix all references to renamed files before approving this PR.\n")
print(f"\n================ {repo_name} PR summary: {pr} ===================")
## test PRs:
# 3081 - no problems
# 2890 - deletes files
# 2888 - deletes ids in a file
# 3113 - deletes a cell in a notebook
# 3210 - renames files we use