Skip to content

Commit 0fdc1a8

Browse files
committedNov 23, 2021
RQ1 finish!
1 parent dd57a0e commit 0fdc1a8

File tree

9 files changed

+376
-208
lines changed

9 files changed

+376
-208
lines changed
 

‎README.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,8 @@
1-
# CLDP
2-
Code Line-level Defect Prediction
1+
# CLBI
2+
Code Line-level Bugginess Identification.
3+
4+
5+
6+
7+
### Script Command
8+
python main.py [model_name]

‎main.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding:utf-8 -*-
22
import warnings
3-
3+
import sys
44
from src.models.tools import *
55
from src.models.explain import *
66
from src.models.glance import *
@@ -10,6 +10,14 @@
1010
warnings.filterwarnings('ignore')
1111
simplefilter(action='ignore', category=FutureWarning)
1212

13+
# The model name and its corresponding python class implementation
14+
MODEL_DICT = {'TMI-LR': TMI_LR, 'TMI-SVM': TMI_SVM, 'TMI-MNB': TMI_MNB, 'TMI-DT': TMI_DT, 'TMI-RF': TMI_RF,
15+
'LineDP': LineDP,
16+
'PMD': PMD, 'CheckStyle': CheckStyle,
17+
'NGram': NGram, 'NGram-C': NGram_C,
18+
'Glance-EA': Glance_EA, 'Glance-MD': Glance_MD, 'Glance-LR': Glance_LR,
19+
}
20+
1321

1422
# ========================= Run RQ1 experiments =================================
1523
def run_cross_release_predict(prediction_model):
@@ -26,7 +34,7 @@ def run_cross_release_predict(prediction_model):
2634
model.analyze_line_level_result()
2735

2836

29-
if __name__ == '__main__':
37+
def run_default():
3038
# ======================= MI-based approaches =============================
3139
# run_cross_release_predict(TMI_LR)
3240
# run_cross_release_predict(TMI_SVM)
@@ -46,6 +54,18 @@ def run_cross_release_predict(prediction_model):
4654
# ======================= CM-based approaches =============================
4755
# run_cross_release_predict(Glance_EA)
4856
# run_cross_release_predict(Glance_MD)
49-
run_cross_release_predict(Glance2)
57+
# run_cross_release_predict(Glance_Resort)
58+
run_cross_release_predict(Glance_LR)
59+
5060

51-
pass
61+
def parse_args():
62+
# 没有参数列表,执行当前程序中定义的方法
63+
if len(sys.argv) == 1:
64+
run_default()
65+
else:
66+
model_name = sys.argv[1]
67+
run_cross_release_predict(MODEL_DICT[model_name])
68+
69+
70+
if __name__ == '__main__':
71+
parse_args()

‎src/exps/RQ1.py

+57-42
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,83 @@
11
# -*- coding: utf-8 -*-
22

33
import sys
4-
from statistics import median, mean
5-
6-
sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
74

5+
import warnings
86
import pandas as pd
97
from pandas import DataFrame
8+
9+
sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
1010
from src.models.glance import *
11+
from statistics import *
12+
13+
# Ignore warning information
14+
warnings.filterwarnings('ignore')
1115

16+
line_level_thresholds = [.05, .10, .15, .20, .25, .30, .35, .40, .45, .50]
17+
indicators = ['recall', 'far', 'ce', 'd2h', 'mcc', 'ifa', 'recall_20', 'ratio']
1218

13-
def run_Glance(prediction_model, line_level_threshold, effort_aware):
14-
for project, releases in get_project_releases_dict().items():
15-
print(f'========== {prediction_model.model_name} CR PREDICTION for {project} =================='[:60])
16-
for i in range(len(releases) - 1):
17-
# 1. Loading data. train data index = i, test data index = i + 1
18-
model = prediction_model(releases[i], releases[i + 1],
19-
line_level_threshold=line_level_threshold,
20-
effort_aware=effort_aware,
21-
test=True)
19+
output_path = '../../result/RQ1/'
20+
make_path(output_path)
2221

23-
model.file_level_prediction()
24-
model.analyze_file_level_result()
2522

26-
model.line_level_prediction()
27-
model.analyze_line_level_result()
23+
def select_model(file_level_classifier, line_level_threshold, train='', test=''):
24+
if file_level_classifier == 'MD':
25+
model = Glance_MD(train, test, line_threshold=line_level_threshold, test=True)
26+
elif file_level_classifier == 'EA':
27+
model = Glance_EA(train, test, line_threshold=line_level_threshold, test=True)
28+
else:
29+
model = Glance_LR(train, test, line_threshold=line_level_threshold, test=True)
30+
return model
2831

2932

30-
def search_parameter(effort_aware=True):
31-
line_level_thresholds = [.05, .10, .15, .20, .25, .30, .35, .40, ]
33+
def search_parameter_Glance(clf):
34+
for threshold in line_level_thresholds:
35+
for project, releases in get_project_releases_dict().items():
36+
for i in range(len(releases) - 1):
37+
# 1. Loading data. train data index = i, test data index = i + 1
38+
model = select_model(clf, threshold, releases[i], releases[i + 1])
3239

33-
for threshold in line_level_thresholds[::-1]:
34-
run_Glance(Glance, threshold, effort_aware)
40+
print(f'========== {model.model_name} CR PREDICTION for {releases[i + 1]} =================='[:60])
41+
model.file_level_prediction()
42+
model.analyze_file_level_result()
3543

44+
model.line_level_prediction()
45+
model.analyze_line_level_result()
3646

37-
def test_parameter(effort_aware=True):
38-
line_level_thresholds = [.05, .10, .15, .20, .25, .30, .35, .40, ]
3947

40-
data = dict()
41-
names = list()
48+
def test_parameter(clf):
49+
print(f'Glance {clf}')
50+
# 水平展示的变化数据, 列名为与之
51+
summary_data_horizontal, summary_data_vertical = list(), dict()
52+
for indicator in indicators:
53+
detail_data, column_names, mean_list = dict(), list(), list()
54+
for threshold in line_level_thresholds:
55+
model = select_model(clf, threshold)
56+
column_names.append(model.model_name)
57+
detail_data[model.model_name] = list(pd.read_csv(model.line_level_evaluation_file)[indicator])
4258

43-
mean_list = list()
44-
for threshold in line_level_thresholds[::-1]:
45-
m = Glance(line_level_threshold=threshold, effort_aware=effort_aware, test=True)
46-
df = pd.read_csv(m.line_level_evaluation_file)
47-
data[m.model_name] = list(df['d2h'])
48-
names.append(m.model_name)
49-
mean_list.append(mean(data[m.model_name]))
50-
print(mean_list)
59+
mean_list.append(round(mean(detail_data[model.model_name]), 3))
5160

52-
result = DataFrame(data, columns=names)
61+
summary_data_horizontal.append(mean_list)
62+
summary_data_vertical[indicator] = mean_list
5363

54-
if effort_aware:
55-
result.to_csv(f'../../result/RQ1/RQ1-D2H-EA.csv', index=False)
56-
else:
57-
result.to_csv(f'../../result/RQ1/RQ1-D2H-MD.csv', index=False)
64+
detail_result = DataFrame(detail_data, index=get_test_releases_list(), columns=column_names)
65+
66+
make_path(f'{output_path}RQ1-Glance-{clf}/')
67+
detail_result.to_csv(f'{output_path}RQ1-Glance-{clf}/{indicator}.csv', index=True)
68+
69+
threshold_indices = ['5%', '10%', '15%', '20%', '25%', '30%', '35%', '40%', '45%', '50%', ]
70+
summary_result = DataFrame(summary_data_horizontal, index=indicators, columns=threshold_indices)
71+
summary_result.to_csv(f'{output_path}RQ1-summary-Glance-{clf}-horizontal.csv', index=True)
72+
summary_result = DataFrame(summary_data_vertical, index=threshold_indices, columns=indicators)
73+
summary_result.to_csv(f'{output_path}RQ1-summary-Glance-{clf}-vertical.csv', index=True)
5874

5975

6076
if __name__ == '__main__':
6177
#
62-
# search_parameter(True)
63-
# test_parameter(True)
64-
65-
search_parameter(False)
66-
test_parameter(False)
78+
file_level_classifiers = ['MD', 'EA', 'LR']
79+
for classifier in file_level_classifiers:
80+
# search_parameter_Glance(classifier)
81+
test_parameter(classifier)
6782

6883
pass

‎src/exps/RQ2.py

+2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
from statistics import mean, median
33

44
from pandas import DataFrame
5+
import sys
56

7+
sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
68
from src.models.explain import *
79
from src.models.natural import *
810
from src.models.tools import *

‎src/models/base_model.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@
1212

1313

1414
class BaseModel(object):
15-
threshold = 50
1615
model_name = 'BaseModel' # need to be rewrite in subclass
1716

18-
def __init__(self, train_release: str = '', test_release: str = ''):
17+
def __init__(self, train_release: str = '', test_release: str = '', test_result_path=''):
1918
# Specific the actual name of each folder or file
2019

2120
self.result_path = f'{root_path}Result/{self.model_name}/'
21+
if test_result_path != '':
22+
self.result_path = test_result_path
2223
# folder path
2324
self.file_level_result_path = f'{self.result_path}file_result/'
2425
self.line_level_result_path = f'{self.result_path}line_result/'

0 commit comments

Comments
 (0)