Skip to content

Commit 86e5d2f

Browse files
committedNov 25, 2021
RQ2,RQ3 finish!
1 parent 0fdc1a8 commit 86e5d2f

14 files changed

+408
-269
lines changed
 

‎.idea/encodings.xml

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎.idea/rAvailablePackageCache.xml

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎.idea/rSettings.xml

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎main.py

+8-21
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
# ========================= Run RQ1 experiments =================================
2323
def run_cross_release_predict(prediction_model):
2424
for project, releases in get_project_releases_dict().items():
25-
print(f'========== {prediction_model.model_name} CR PREDICTION for {project} ========================'[:60])
2625
for i in range(len(releases) - 1):
2726
# 1. Loading data. train data index = i, test data index = i + 1
27+
print(f'========== {prediction_model.model_name} CR PREDICTION for {releases[i + 1]} ================'[:60])
2828
model = prediction_model(releases[i], releases[i + 1])
2929

3030
model.file_level_prediction()
@@ -35,33 +35,20 @@ def run_cross_release_predict(prediction_model):
3535

3636

3737
def run_default():
38-
# ======================= MI-based approaches =============================
39-
# run_cross_release_predict(TMI_LR)
40-
# run_cross_release_predict(TMI_SVM)
41-
# run_cross_release_predict(TMI_MNB)
42-
# run_cross_release_predict(TMI_DT)
43-
# run_cross_release_predict(TMI_RF)
44-
# run_cross_release_predict(LineDP)
38+
# Optional approaches
39+
# ======= MIT-based approaches ======= TMI_LR, TMI_SVM, TMI_MNB, TMI_DT, TMI_RF, LineDP
40+
# ======= SAT-based approaches ======= PMD, CheckStyle
41+
# ======= NLP-based approaches ======= NGram, NGram_C
42+
# ======= Glance-XX approaches ======= Glance_MD, Glance_EA, Glance_LR
4543

46-
# ======================= SAT-based approaches ============================
47-
# run_cross_release_predict(PMD)
48-
# run_cross_release_predict(CheckStyle)
49-
50-
# ======================= LM-based approaches =============================
51-
# run_cross_release_predict(NGram)
52-
# run_cross_release_predict(NGram_C)
53-
54-
# ======================= CM-based approaches =============================
55-
# run_cross_release_predict(Glance_EA)
56-
# run_cross_release_predict(Glance_MD)
57-
# run_cross_release_predict(Glance_Resort)
5844
run_cross_release_predict(Glance_LR)
5945

6046

6147
def parse_args():
62-
# 没有参数列表,执行当前程序中定义的方法
48+
# If there is no additional parameters in the command line, run the default models.
6349
if len(sys.argv) == 1:
6450
run_default()
51+
# Run the specific models.
6552
else:
6653
model_name = sys.argv[1]
6754
run_cross_release_predict(MODEL_DICT[model_name])

‎src/exps/Dis1.py

+76-37
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,89 @@
1-
# -*- coding:utf-8 -*-
1+
# -*- coding: utf-8 -*-
22

3-
def extract():
4-
with open('a.csv', 'r') as file:
5-
d = file.readlines()
3+
import sys
64

7-
text = ''
8-
flag = False
5+
import warnings
6+
import pandas as pd
7+
from pandas import DataFrame
98

10-
last_effort = 0
9+
sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
10+
from src.models.glance import *
11+
from statistics import *
1112

12-
for line in d:
13-
if '=' in line:
14-
last_name = line.split(' ')[-2]
15-
text += last_name + '\n'
16-
print(last_name)
17-
else:
18-
if flag:
19-
text += line
20-
flag = False
21-
else:
22-
flag = True
13+
# Ignore warning information
14+
warnings.filterwarnings('ignore')
2315

24-
with open('r.csv', 'w') as file:
25-
file.write(text)
16+
# .05, .10, .15, .20, .25, .30, .35, .40, .45, .50, .55, .60, .65, .70, .55, .80, .85, .90, .95, 1
17+
line_level_thresholds = [.05, .10, .15, .20, .25, .30, .35, .40, .45, .50, ]
18+
indicators = ['recall', 'far', 'ce', 'd2h', 'mcc', 'ifa', 'recall_20', 'ratio']
2619

20+
output_path = '../../result/Dis1/'
21+
make_path(output_path)
2722

28-
def trans():
29-
with open('r.csv', 'r') as file:
30-
data = file.readlines()
3123

32-
text = ''
33-
last_name = data[0].strip()
34-
temp = ''
35-
for line in data[1:]:
36-
if '[' not in line:
37-
text += last_name + ',' + temp + '\n'
38-
last_name = line.strip()
39-
temp = ''
40-
else:
41-
temp += line.strip().replace('[', '').replace(']', '') + ', '
24+
def select_model(file_level_classifier, line_level_threshold, train='', test=''):
25+
if file_level_classifier == 'MD':
26+
model = Glance_MD(train, test, line_threshold=line_level_threshold, test=True)
27+
elif file_level_classifier == 'EA':
28+
model = Glance_EA(train, test, line_threshold=line_level_threshold, test=True)
29+
else:
30+
model = Glance_LR(train, test, line_threshold=line_level_threshold, test=True)
31+
return model
4232

43-
text += last_name + ',' + temp + '\n'
4433

45-
with open('r2.csv', 'w') as file:
46-
file.write(text)
34+
def search_parameter_Glance(clf):
35+
for threshold in line_level_thresholds:
36+
for project, releases in get_project_releases_dict().items():
37+
for i in range(len(releases) - 1):
38+
# 1. Loading data. train data index = i, test data index = i + 1
39+
model = select_model(clf, threshold, releases[i], releases[i + 1])
40+
41+
print(f'========== {model.model_name} CR PREDICTION for {releases[i + 1]} =================='[:60])
42+
model.file_level_prediction()
43+
model.analyze_file_level_result()
44+
45+
model.line_level_prediction()
46+
model.analyze_line_level_result()
47+
48+
49+
def test_parameter(clf):
50+
print(f'======================== Glance {clf} ===========================')
51+
eva_method = [mean, median]
52+
53+
for method in eva_method:
54+
# 水平展示的变化数据, 列名为阈值
55+
summary_data_horizontal, summary_data_vertical = list(), dict()
56+
for indicator in indicators:
57+
detail_data, column_names, mean_list = dict(), list(), list()
58+
for threshold in line_level_thresholds:
59+
model = select_model(clf, threshold)
60+
column_names.append(model.model_name)
61+
detail_data[model.model_name] = list(pd.read_csv(model.line_level_evaluation_file)[indicator])
62+
63+
mean_list.append(round(method(detail_data[model.model_name]), 3))
64+
65+
summary_data_horizontal.append(mean_list)
66+
summary_data_vertical[indicator] = mean_list
67+
68+
detail_result = DataFrame(detail_data, index=get_test_releases_list(), columns=column_names)
69+
70+
make_path(f'{output_path}Glance-{clf}/')
71+
detail_result.to_csv(f'{output_path}Glance-{clf}/{method.__name__}-{indicator}.csv', index=True)
72+
73+
# '5%', '10%', '15%', '20%', '25%', '30%', '35%', '40%', '45%', '50%',
74+
# '55%', '60%', '65%', '70%', '75%', '80%', '85%', '90%', '95%', '100%',
75+
threshold_indices = ['5%', '10%', '15%', '20%', '25%', '30%', '35%', '40%', '45%', '50%', ]
76+
summary_result = DataFrame(summary_data_horizontal, index=indicators, columns=threshold_indices)
77+
summary_result.to_csv(f'{output_path}Dis1-summary-{method.__name__}-Glance-{clf}-horizontal.csv', index=True)
78+
summary_result = DataFrame(summary_data_vertical, index=threshold_indices, columns=indicators)
79+
summary_result.to_csv(f'{output_path}Dis1-summary-{method.__name__}-Glance-{clf}-vertical.csv', index=True)
4780

4881

4982
if __name__ == '__main__':
50-
trans()
83+
#
84+
file_level_classifiers = ['MD', 'EA', 'LR']
85+
for classifier in file_level_classifiers:
86+
# search_parameter_Glance(classifier)
87+
test_parameter(classifier)
88+
pass
89+
pass

‎src/exps/RQ1.py

+16-47
Original file line numberDiff line numberDiff line change
@@ -8,76 +8,45 @@
88

99
sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
1010
from src.models.glance import *
11-
from statistics import *
1211

1312
# Ignore warning information
1413
warnings.filterwarnings('ignore')
1514

16-
line_level_thresholds = [.05, .10, .15, .20, .25, .30, .35, .40, .45, .50]
1715
indicators = ['recall', 'far', 'ce', 'd2h', 'mcc', 'ifa', 'recall_20', 'ratio']
1816

1917
output_path = '../../result/RQ1/'
2018
make_path(output_path)
2119

2220

23-
def select_model(file_level_classifier, line_level_threshold, train='', test=''):
21+
def select_model(file_level_classifier, train='', test=''):
2422
if file_level_classifier == 'MD':
25-
model = Glance_MD(train, test, line_threshold=line_level_threshold, test=True)
23+
model = Glance_MD(train, test)
2624
elif file_level_classifier == 'EA':
27-
model = Glance_EA(train, test, line_threshold=line_level_threshold, test=True)
25+
model = Glance_EA(train, test)
2826
else:
29-
model = Glance_LR(train, test, line_threshold=line_level_threshold, test=True)
27+
model = Glance_LR(train, test)
3028
return model
3129

3230

33-
def search_parameter_Glance(clf):
34-
for threshold in line_level_thresholds:
35-
for project, releases in get_project_releases_dict().items():
36-
for i in range(len(releases) - 1):
37-
# 1. Loading data. train data index = i, test data index = i + 1
38-
model = select_model(clf, threshold, releases[i], releases[i + 1])
39-
40-
print(f'========== {model.model_name} CR PREDICTION for {releases[i + 1]} =================='[:60])
41-
model.file_level_prediction()
42-
model.analyze_file_level_result()
43-
44-
model.line_level_prediction()
45-
model.analyze_line_level_result()
46-
47-
4831
def test_parameter(clf):
49-
print(f'Glance {clf}')
50-
# 水平展示的变化数据, 列名为与之
51-
summary_data_horizontal, summary_data_vertical = list(), dict()
52-
for indicator in indicators:
53-
detail_data, column_names, mean_list = dict(), list(), list()
54-
for threshold in line_level_thresholds:
55-
model = select_model(clf, threshold)
56-
column_names.append(model.model_name)
57-
detail_data[model.model_name] = list(pd.read_csv(model.line_level_evaluation_file)[indicator])
58-
59-
mean_list.append(round(mean(detail_data[model.model_name]), 3))
32+
print(f'======================== Glance {clf} ===========================')
33+
detail_data, column_names = list(), list()
34+
model = select_model(clf)
35+
data = pd.read_csv(model.line_level_evaluation_file)[indicators]
36+
last = 0
37+
for project, release in get_project_releases_dict().items():
38+
start, end = last, last + len(release[1:])
39+
detail_data.append(list(data.iloc[start:end].mean(axis=0)))
40+
column_names.append(project)
41+
last = end
6042

61-
summary_data_horizontal.append(mean_list)
62-
summary_data_vertical[indicator] = mean_list
63-
64-
detail_result = DataFrame(detail_data, index=get_test_releases_list(), columns=column_names)
65-
66-
make_path(f'{output_path}RQ1-Glance-{clf}/')
67-
detail_result.to_csv(f'{output_path}RQ1-Glance-{clf}/{indicator}.csv', index=True)
68-
69-
threshold_indices = ['5%', '10%', '15%', '20%', '25%', '30%', '35%', '40%', '45%', '50%', ]
70-
summary_result = DataFrame(summary_data_horizontal, index=indicators, columns=threshold_indices)
71-
summary_result.to_csv(f'{output_path}RQ1-summary-Glance-{clf}-horizontal.csv', index=True)
72-
summary_result = DataFrame(summary_data_vertical, index=threshold_indices, columns=indicators)
73-
summary_result.to_csv(f'{output_path}RQ1-summary-Glance-{clf}-vertical.csv', index=True)
43+
summary_result = DataFrame(detail_data, index=get_project_list(), columns=indicators)
44+
summary_result.to_csv(f'{output_path}Glance-{clf}.csv', index=True)
7445

7546

7647
if __name__ == '__main__':
7748
#
7849
file_level_classifiers = ['MD', 'EA', 'LR']
7950
for classifier in file_level_classifiers:
80-
# search_parameter_Glance(classifier)
8151
test_parameter(classifier)
82-
8352
pass

0 commit comments

Comments
 (0)
Please sign in to comment.