Skip to content

Commit 4c5ebc0

Browse files
bors[bot]philberty
andauthored
Merge #842
842: add script to graph the changes over time r=philberty a=philberty This script was used to get metrics and a graph for the 2021 end year report Co-authored-by: Philip Herron <[email protected]>
2 parents b06de30 + 04c1f56 commit 4c5ebc0

File tree

1 file changed

+171
-0
lines changed

1 file changed

+171
-0
lines changed

gcc/rust/monthly-diff.py

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
#!/usr/bin/env python3
2+
3+
import matplotlib.pyplot as plt
4+
import numpy as np
5+
6+
import matplotlib
7+
import subprocess
8+
import sys
9+
import re
10+
import os
11+
12+
13+
try:
14+
number_of_weeks = int(sys.argv[1])
15+
except Exception as e:
16+
print("script requires an integer argument for the number of weeks")
17+
sys.exit(-1)
18+
19+
20+
cwd_is_root_repo = os.path.isdir('.git')
21+
if not cwd_is_root_repo:
22+
print("script must be ran from the root of the repo")
23+
sys.exit(-1)
24+
25+
26+
change_list = []
27+
for i in range(number_of_weeks):
28+
since = "%i weeks ago" % (i + 1)
29+
until = "%i weeks ago" % i if i > 0 else None
30+
31+
command = [ "git", "whatchanged", "--since=%s" % since ]
32+
if until is not None:
33+
command.append("--until=%s" % until)
34+
35+
specific_paths = [ 'gcc/rust', 'gcc/testsuite/rust', 'gcc/testsuite/rust.test' ]
36+
command += ['--'] + specific_paths
37+
38+
result = subprocess.run(
39+
command, capture_output=True, text=True
40+
)
41+
raw_diff = result.stdout
42+
43+
commit_lines = re.findall('commit [a-z0-9]{40}', raw_diff)
44+
commit_shas = list(map(lambda i: i.split(' ')[1], commit_lines))
45+
46+
changes = {
47+
'files_changed': 0,
48+
'insertions': 0,
49+
'deletions': 0,
50+
'contributors': set(),
51+
'contributions': list()
52+
}
53+
54+
short_stat_out = None
55+
if len(commit_shas) > 0:
56+
if len(commit_shas) == 1:
57+
short_stat_command = [ 'git', 'show', '%s' % commit_shas[0], '--shortstat' ]
58+
result = subprocess.run(
59+
short_stat_command, capture_output=True, text=True
60+
)
61+
raw_short_stat = result.stdout
62+
short_stat_out = raw_short_stat.split('\n')[-2].strip()
63+
64+
else:
65+
from_sha = commit_shas[-1]
66+
to_sha = commit_shas[0]
67+
short_stat_command = [ 'git', 'diff', '%s..%s' % (from_sha, to_sha), '--shortstat' ]
68+
result = subprocess.run(
69+
short_stat_command, capture_output=True, text=True
70+
)
71+
raw_short_stat = result.stdout
72+
short_stat_out = raw_short_stat.strip()
73+
74+
75+
if short_stat_out is not None:
76+
# pull out the numbers via regex
77+
search = re.findall('[0-9]* file', short_stat_out)
78+
if search is not None:
79+
changes['files_changed'] = int(search[0].split(' ')[0])
80+
81+
search = re.findall('[0-9]* insertion', short_stat_out)
82+
if search is not None:
83+
changes['insertions'] = int(search[0].split(' ')[0])
84+
85+
search = re.findall('[0-9]* deletion', short_stat_out)
86+
if search is not None:
87+
if len(search) > 0:
88+
changes['deletions'] = int(search[0].split(' ')[0])
89+
90+
91+
# figure out the unique contributors each week
92+
for sha in commit_shas:
93+
show_command = [ "git", "show", "-s", sha ]
94+
result = subprocess.run(
95+
show_command, capture_output=True, text=True
96+
)
97+
author_str = re.findall('Author: [a-zA-Z0-9-è ]* <[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+>', result.stdout)
98+
author_tokens = author_str[0].split(' ')
99+
author = " ".join(author_tokens[1:])
100+
changes['contributors'].add(author)
101+
changes['contributions'].append(author)
102+
103+
104+
# lets see it then
105+
print(changes)
106+
107+
# hack to get rid of the gcc-merge
108+
if changes['files_changed'] == 23432:
109+
change_list.append(change_list[-1])
110+
else:
111+
change_list.append(changes)
112+
113+
114+
unique_contributors = set()
115+
for i in change_list:
116+
for y in i['contributors']:
117+
unique_contributors.add(y)
118+
119+
# calculate average lines added and removed per week
120+
total_ins_lines = sum(map(lambda i: i['insertions'], change_list))
121+
avg_ins_lines_per_week = total_ins_lines / float(len(change_list))
122+
123+
total_del_lines = sum(map(lambda i: i['deletions'], change_list))
124+
avg_del_lines_per_week = total_del_lines / float(len(change_list))
125+
126+
print("average lines added per week:", avg_ins_lines_per_week)
127+
print("average lines deleted per week:", avg_del_lines_per_week)
128+
129+
# find number of contributions per unique_contributor
130+
num_contribs = {}
131+
for i in unique_contributors:
132+
num_contribs[i] = 0
133+
for change in change_list:
134+
for y in change['contributions']:
135+
if y == i:
136+
num_contribs[i] = num_contribs[i] + 1
137+
138+
139+
contribs = []
140+
for i in num_contribs:
141+
contribs.append((i, num_contribs[i]))
142+
143+
contribs.sort(key=lambda i: i[1], reverse=True)
144+
for i in contribs:
145+
print(i[0], i[1])
146+
147+
148+
# graph the change_list
149+
weeks = list(map(lambda i: i, range(number_of_weeks)))
150+
insertions = list(map(lambda i: i['insertions'], change_list))
151+
deletions = list(map(lambda i: i['deletions'], change_list))
152+
files_changed = list(map(lambda i: i['files_changed'], change_list))
153+
num_contributors = list(map(lambda i: len(i['contributors']), change_list))
154+
155+
weeks.reverse()
156+
insertions.reverse()
157+
deletions.reverse()
158+
files_changed.reverse()
159+
num_contributors.reverse()
160+
161+
fix, axs = plt.subplots(2)
162+
163+
axs[0].set_title("number of lines added + removed")
164+
axs[0].plot(weeks, insertions)
165+
axs[0].plot(weeks, deletions)
166+
# plt.plot(weeks, files_changed)
167+
168+
axs[1].set_title("number of contributors")
169+
axs[1].plot(weeks, num_contributors)
170+
171+
plt.show()

0 commit comments

Comments
 (0)