Skip to content

Commit 457b8d6

Browse files
committed
12.14
1 parent 1863155 commit 457b8d6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+669
-139
lines changed

main.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@ def run_default():
4141
# ======= NLP-based approaches ======= NGram, NGram_C
4242
# ======= Glance-XX approaches ======= Glance_MD, Glance_EA, Glance_LR
4343

44-
run_cross_release_predict(Glance_LR)
45-
44+
# run_cross_release_predict(Glance_LR)
45+
# run_cross_release_predict(Glance_EA)
46+
# run_cross_release_predict(Glance_MD)
47+
run_cross_release_predict(LineDP)
4648

4749
def parse_args():
4850
# If there is no additional parameters in the command line, run the default models.

result/Dis1_1/effort-total.csv

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
release,total_effort
2+
ambari-2.1.0,292084
3+
ambari-2.2.0,316411
4+
ambari-2.4.0,445436
5+
ambari-2.5.0,484406
6+
ambari-2.6.0,487664
7+
ambari-2.7.0,484089
8+
amq-5.1.0,190852
9+
amq-5.2.0,204975
10+
amq-5.4.0,260959
11+
amq-5.5.0,274603
12+
amq-5.6.0,306066
13+
amq-5.7.0,305607
14+
amq-5.8.0,311711
15+
amq-5.9.0,315307
16+
amq-5.10.0,323082
17+
amq-5.11.0,333415
18+
amq-5.12.0,350040
19+
amq-5.14.0,370472
20+
amq-5.15.0,373372
21+
bookkeeper-4.2.0,78715
22+
bookkeeper-4.4.0,60408
23+
calcite-1.8.0,327252
24+
calcite-1.11.0,341392
25+
calcite-1.13.0,273186
26+
calcite-1.15.0,288305
27+
calcite-1.16.0,295153
28+
calcite-1.17.0,299074
29+
calcite-1.18.0,308933
30+
cassandra-0.8.6,131184
31+
cassandra-1.0.9,144502
32+
cassandra-1.1.6,168159
33+
cassandra-1.1.11,170533
34+
cassandra-1.2.11,211949
35+
flink-1.6.0,608219
36+
groovy-1.5.5,109318
37+
groovy-1.6.0,141955
38+
groovy-1.7.3,156480
39+
groovy-1.7.6,159176
40+
groovy-1.8.1,176662
41+
groovy-1.8.7,183462
42+
groovy-2.1.0,203909
43+
groovy-2.1.6,206046
44+
groovy-2.4.4,243567
45+
groovy-2.4.6,244632
46+
groovy-2.4.8,245541
47+
groovy-2.5.0,279517
48+
groovy-2.5.5,280363
49+
hbase-0.94.5,280440
50+
hbase-0.98.0,606913
51+
hbase-0.98.5,623281
52+
hbase-0.98.11,654904
53+
hive-1.2.0,871574
54+
hive-2.0.0,1039823
55+
hive-2.1.0,1112941
56+
ignite-1.4.0,610699
57+
ignite-1.6.0,690475
58+
log4j2-2.1,79796
59+
log4j2-2.2,80161
60+
log4j2-2.3,80883
61+
log4j2-2.4,93481
62+
log4j2-2.5,103402
63+
log4j2-2.6,121815
64+
log4j2-2.7,132836
65+
log4j2-2.8,138778
66+
log4j2-2.9,145534
67+
log4j2-2.10,148721
68+
mahout-0.4,125301
69+
mahout-0.5,124968
70+
mahout-0.6,145593
71+
mahout-0.7,133660
72+
mahout-0.8,149368
73+
mng-3.1.0,91695
74+
mng-3.2.0,91886
75+
mng-3.3.0,98343
76+
mng-3.5.0,99215
77+
mng-3.6.0,100401
78+
nifi-1.2.0,416418
79+
nifi-1.5.0,489997
80+
nifi-1.8.0,560333
81+
nutch-1.3,45486
82+
nutch-1.4,44989
83+
nutch-1.5,46006
84+
nutch-1.6,46843
85+
nutch-1.7,47477
86+
nutch-1.8,48420
87+
nutch-1.9,47780
88+
nutch-1.10,55704
89+
nutch-1.12,69185
90+
nutch-1.13,70459
91+
nutch-1.14,72871
92+
nutch-1.15,77351
93+
storm-0.9.3,80758
94+
storm-1.0.0,221337
95+
storm-1.0.3,234518
96+
storm-1.0.5,235508
97+
tika-0.8,30890
98+
tika-0.9,34174
99+
tika-0.10,46748
100+
tika-1.1,50768
101+
tika-1.3,58365
102+
tika-1.5,61281
103+
tika-1.7,69500
104+
tika-1.10,87547
105+
tika-1.13,93972
106+
tika-1.15,117323
107+
tika-1.17,120209
108+
ww-2.0.5,69404
109+
ww-2.0.10,74189
110+
ww-2.1.1,75656
111+
ww-2.1.3,84193
112+
ww-2.1.7,88316
113+
ww-2.2.0,195385
114+
ww-2.2.2,197224
115+
ww-2.3.1,198580
116+
ww-2.3.4,199575
117+
ww-2.3.10,200322
118+
ww-2.3.15,202082
119+
ww-2.3.17,204101
120+
ww-2.3.20,205872
121+
ww-2.3.24,207064
122+
zookeeper-3.5.1,65270
123+
zookeeper-3.5.2,66505
124+
zookeeper-3.5.3,68039
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

src/exps/Dis1_1.py

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# -*- coding:utf-8 -*-
2+
3+
import sys
4+
5+
import warnings
6+
import pandas as pd
7+
from pandas import DataFrame
8+
9+
sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
10+
from src.models.glance import *
11+
from statistics import *
12+
13+
output_path = '../../result/Dis1_1/'
14+
15+
16+
def get_effort_of_Glance(clf):
17+
print(clf)
18+
total_data = read_data_from_file(f'{output_path}effort-total.csv')[1:]
19+
text = 'release,review_effort,total_effort,ratio\n'
20+
count = 0
21+
for project, releases in get_project_releases_dict().items():
22+
for i in range(len(releases) - 1):
23+
total_effort = int(total_data[count].split(',')[1])
24+
file_path = f'{root_path}Result/{clf}/line_result/{project}/{releases[i + 1]}-result.csv'
25+
review_effort = len(read_data_from_file(file_path)) - 1
26+
text += f'{releases[i + 1]},{review_effort},{total_effort},{round(review_effort / total_effort, 3)}\n'
27+
count += 1
28+
save_csv_result(output_path, f'effort-{clf}.csv', text)
29+
30+
31+
def summary():
32+
MD = read_data_from_file(f'{output_path}effort-Glance-MD.csv')[1:]
33+
EA = read_data_from_file(f'{output_path}effort-Glance-EA.csv')[1:]
34+
LR = read_data_from_file(f'{output_path}effort-Glance-LR.csv')[1:]
35+
36+
text = 'release,MD_effort,EA_effort,LR_effort\n'
37+
count = 0
38+
for project, releases in get_project_releases_dict().items():
39+
MD_data, EA_data, LR_data = [], [], []
40+
for i in range(len(releases) - 1):
41+
MD_data.append(float(MD[count].split(',')[3]))
42+
EA_data.append(float(EA[count].split(',')[3]))
43+
LR_data.append(float(LR[count].split(',')[3]))
44+
count += 1
45+
text += f'{project},{median(MD_data)},{median(EA_data)},{median(LR_data)}\n'
46+
47+
save_csv_result(output_path, f'effort-comparison.csv', text)
48+
49+
50+
if __name__ == '__main__':
51+
for clf in ['Glance-MD', 'Glance-EA', 'Glance-LR']:
52+
# get_effort_of_Glance(clf)
53+
pass
54+
summary()
55+
pass

src/exps/Dis1.py src/exps/Dis1_2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
'75%', '80%', '85%', '90%', '95%', '100%', ]
2020
indicators = ['recall', 'far', 'ce', 'd2h', 'mcc', 'ifa', 'recall_20', 'ratio']
2121

22-
output_path = '../../result/Dis1/'
22+
output_path = '../../result/Dis1_2/'
2323
make_path(output_path)
2424

2525

src/exps/Dis1_3.py

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import sys
4+
5+
sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
6+
from statistics import mean, median
7+
import pandas as pd
8+
from pandas import DataFrame
9+
10+
from src.models.glance import *
11+
12+
output_path = '../../result/Dis1_3/'
13+
14+
15+
def select_models():
16+
"""
17+
Select proper models according to different purposes.
18+
:param exp: Experiment name
19+
:return: A list of model instances.
20+
"""
21+
# , Glance_B_NT_noCC(), Glance_B_NFC_noCC
22+
return [Glance_LR(), Glance_B_noCC(), Glance_B_NT(), Glance_B_NT_noCC(), Glance_B_NFC(), Glance_B_NFC_noCC()]
23+
24+
25+
def collect_line_level_summary_result(eva_method=None):
26+
if eva_method is None:
27+
eva_method = [mean, median]
28+
text = ''
29+
for method in eva_method:
30+
text += f'Approach,Recall,FAR,CE,D2H,MCC,IFA,Recall@20%,ratio\n'
31+
for model in select_models():
32+
df = pd.read_csv(model.line_level_evaluation_file)
33+
34+
recall = round(method(list(df['recall'])), 3)
35+
far = round(method(list(df['far'])), 3)
36+
ce = round(method(list(df['ce'])), 3)
37+
d2h = round(method(list(df['d2h'])), 3)
38+
mcc = round(method(list(df['mcc'])), 3)
39+
ifa = int(method(list(df['ifa'])))
40+
recall_20 = round(method(list(df['recall_20'])), 3)
41+
ratio = round(method(list(df['ratio'])), 3)
42+
# ER = round(method(list(df['ER'])), 3)
43+
# RI = round(method(list(df['RI'])), 3)
44+
text += f'{model.model_name},{recall},{far},{ce},{d2h},{mcc},{ifa},{recall_20},{ratio}\n'
45+
text += '\n'
46+
save_csv_result(output_path, f'Performance_Summary.csv', text)
47+
48+
49+
# =================== Line level result in terms of different Performance Indicators experiments ================
50+
def collect_line_level_by_indicators():
51+
models = select_models()
52+
indicators = ['recall', 'far', 'ce', 'd2h', 'mcc', 'ifa', 'recall_20', 'ratio']
53+
for indicator in indicators:
54+
data = dict()
55+
for model in models:
56+
data[model.model_name] = pd.read_csv(model.line_level_evaluation_file)[indicator].tolist()
57+
58+
ratio = DataFrame(data, columns=[model.model_name for model in models])
59+
ratio.to_csv(f'{output_path}Performance Indicators/{indicator}.csv', index=False)
60+
61+
62+
if __name__ == '__main__':
63+
collect_line_level_summary_result()
64+
collect_line_level_by_indicators()
65+
pass

src/exps/Dis3.py

-4
This file was deleted.

src/exps/RQ23_Difference.py

-38
Original file line numberDiff line numberDiff line change
@@ -81,44 +81,6 @@ def classification_difference(exp="RQ2"):
8181
save_csv_result(f'../../result/{exp}/', 'Difference_summary.csv', text)
8282

8383

84-
def diff_ranking():
85-
access_model = 'AccessModel'
86-
target_model = 'CheckStyleModel' # PMDModel CheckStyleModel
87-
text = ''
88-
for project, releases in get_project_releases_dict().items():
89-
90-
print(project, end=',')
91-
increase, decrease = [], []
92-
for release in releases[1:]:
93-
data_access = load_pk_result(f'{result_path}Diff_Ranking/{access_model}/{release}.pk')
94-
data_target = load_pk_result(f'{result_path}Diff_Ranking/{target_model}/{release}.pk')
95-
i, d = [], []
96-
t, text_lines, label, filename = read_file_level_dataset(release)
97-
len_dict = {}
98-
for index in range(len(filename)):
99-
len_dict[filename[index]] = len(text_lines[index])
100-
101-
for file_line in data_target.keys():
102-
if file_line not in data_access.keys():
103-
continue
104-
rank_of_access = data_access[file_line]
105-
rank_of_target = data_target[file_line]
106-
# increase
107-
if rank_of_access < rank_of_target:
108-
i.append((rank_of_target - rank_of_access) / len_dict[file_line.split(':')[0]])
109-
# decrease
110-
if rank_of_access > rank_of_target:
111-
d.append((rank_of_access - rank_of_target) / len_dict[file_line.split(':')[0]])
112-
113-
increase.append(mean(i))
114-
decrease.append(mean(d))
115-
116-
text += project + ',' + str(increase).replace('[', '').replace(']', '') + '\n'
117-
text += project + ',' + str(decrease).replace('[', '').replace(']', '') + '\n'
118-
print(text)
119-
save_result('C:/Users/GZQ/Desktop/d.csv', text)
120-
121-
12284
if __name__ == '__main__':
12385
# diff_ranking()
12486
for experiment in ["RQ2", "RQ3"]:

src/models/base_model.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -242,15 +242,16 @@ def analyze_line_level_result(self):
242242
ifa, r_20 = self.rank_strategy_3() # Strategy 1
243243

244244
################################ Bug hit ratio ################################################
245-
buggy_lines_dict = read_dict_from_file(f'{self.commit_buggy_path}/{self.test_release}_commit_buggy_lines.csv')
245+
# buggy_lines_dict = read_dict_from_file(f'{self.commit_buggy_path}/{self.test_release}_commit_buggy_lines.csv')
246+
buggy_lines_dict = {} # = read_dict_from_file(f'{self.commit_buggy_path}/{self.test_release}_commit_buggy_lines.csv')
246247
total_bugs = len(buggy_lines_dict.keys())
247248
hit_bugs = set()
248249
for line in self.predicted_buggy_lines:
249250
for bug_commit, lines in buggy_lines_dict.items():
250251
if line in lines:
251252
hit_bugs.add(bug_commit)
252253

253-
ratio = round(len(hit_bugs) / total_bugs, 3)
254+
ratio = 0 if total_bugs == 0 else round(len(hit_bugs) / total_bugs, 3)
254255

255256
################################ Output the evaluation result ################################################
256257
append_title = True if not os.path.exists(self.line_level_evaluation_file) else False

0 commit comments

Comments
 (0)