Naplues · Nov 25, 2021
diff --git a/‎.idea/encodings.xml
+6 b/‎.idea/encodings.xml
+6
diff --git a/‎.idea/rAvailablePackageCache.xml
+1-1 b/‎.idea/rAvailablePackageCache.xml
+1-1
diff --git a/‎.idea/rSettings.xml
+1-1 b/‎.idea/rSettings.xml
+1-1
diff --git a/‎main.py
+8-21 b/‎main.py
+8-21
diff --git a/‎src/exps/Dis1.py
+76-37 b/‎src/exps/Dis1.py
+76-37
diff --git a/‎src/exps/RQ1.py
+16-47 b/‎src/exps/RQ1.py
+16-47
@@ -22,9 +22,9 @@
 # ========================= Run RQ1 experiments =================================
 def run_cross_release_predict(prediction_model):
     for project, releases in get_project_releases_dict().items():
-        print(f'========== {prediction_model.model_name} CR PREDICTION for {project} ========================'[:60])
         for i in range(len(releases) - 1):
             # 1. Loading data. train data index = i, test data index = i + 1
+            print(f'========== {prediction_model.model_name} CR PREDICTION for {releases[i + 1]} ================'[:60])
             model = prediction_model(releases[i], releases[i + 1])
 
             model.file_level_prediction()
@@ -35,33 +35,20 @@ def run_cross_release_predict(prediction_model):
 
 
 def run_default():
-    # ======================= MI-based approaches =============================
-    # run_cross_release_predict(TMI_LR)
-    # run_cross_release_predict(TMI_SVM)
-    # run_cross_release_predict(TMI_MNB)
-    # run_cross_release_predict(TMI_DT)
-    # run_cross_release_predict(TMI_RF)
-    # run_cross_release_predict(LineDP)
+    # Optional approaches
+    # ======= MIT-based approaches ======= TMI_LR, TMI_SVM, TMI_MNB, TMI_DT, TMI_RF, LineDP
+    # ======= SAT-based approaches ======= PMD, CheckStyle
+    # ======= NLP-based approaches ======= NGram, NGram_C
+    # ======= Glance-XX approaches ======= Glance_MD, Glance_EA, Glance_LR
 
-    # ======================= SAT-based approaches ============================
-    # run_cross_release_predict(PMD)
-    # run_cross_release_predict(CheckStyle)
-
-    # ======================= LM-based approaches =============================
-    # run_cross_release_predict(NGram)
-    # run_cross_release_predict(NGram_C)
-
-    # ======================= CM-based approaches =============================
-    # run_cross_release_predict(Glance_EA)
-    # run_cross_release_predict(Glance_MD)
-    # run_cross_release_predict(Glance_Resort)
     run_cross_release_predict(Glance_LR)
 
 
 def parse_args():
-    # 没有参数列表,执行当前程序中定义的方法
+    # If there is no additional parameters in the command line, run the default models.
     if len(sys.argv) == 1:
         run_default()
+    # Run the specific models.
     else:
         model_name = sys.argv[1]
         run_cross_release_predict(MODEL_DICT[model_name])
 
@@ -1,50 +1,89 @@
-# -*- coding:utf-8 -*-
+# -*- coding: utf-8 -*-
 
-def extract():
-    with open('a.csv', 'r') as file:
-        d = file.readlines()
+import sys
 
-    text = ''
-    flag = False
+import warnings
+import pandas as pd
+from pandas import DataFrame
 
-    last_effort = 0
+sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
+from src.models.glance import *
+from statistics import *
 
-    for line in d:
-        if '=' in line:
-            last_name = line.split(' ')[-2]
-            text += last_name + '\n'
-            print(last_name)
-        else:
-            if flag:
-                text += line
-                flag = False
-            else:
-                flag = True
+# Ignore warning information
+warnings.filterwarnings('ignore')
 
-    with open('r.csv', 'w') as file:
-        file.write(text)
+# .05, .10, .15, .20, .25, .30, .35, .40, .45, .50, .55, .60, .65, .70, .55, .80, .85, .90, .95, 1
+line_level_thresholds = [.05, .10, .15, .20, .25, .30, .35, .40, .45, .50, ]
+indicators = ['recall', 'far', 'ce', 'd2h', 'mcc', 'ifa', 'recall_20', 'ratio']
 
+output_path = '../../result/Dis1/'
+make_path(output_path)
 
-def trans():
-    with open('r.csv', 'r') as file:
-        data = file.readlines()
 
-    text = ''
-    last_name = data[0].strip()
-    temp = ''
-    for line in data[1:]:
-        if '[' not in line:
-            text += last_name + ',' + temp + '\n'
-            last_name = line.strip()
-            temp = ''
-        else:
-            temp += line.strip().replace('[', '').replace(']', '') + ', '
+def select_model(file_level_classifier, line_level_threshold, train='', test=''):
+    if file_level_classifier == 'MD':
+        model = Glance_MD(train, test, line_threshold=line_level_threshold, test=True)
+    elif file_level_classifier == 'EA':
+        model = Glance_EA(train, test, line_threshold=line_level_threshold, test=True)
+    else:
+        model = Glance_LR(train, test, line_threshold=line_level_threshold, test=True)
+    return model
 
-    text += last_name + ',' + temp + '\n'
 
-    with open('r2.csv', 'w') as file:
-        file.write(text)
+def search_parameter_Glance(clf):
+    for threshold in line_level_thresholds:
+        for project, releases in get_project_releases_dict().items():
+            for i in range(len(releases) - 1):
+                # 1. Loading data. train data index = i, test data index = i + 1
+                model = select_model(clf, threshold, releases[i], releases[i + 1])
+
+                print(f'========== {model.model_name} CR PREDICTION for {releases[i + 1]} =================='[:60])
+                model.file_level_prediction()
+                model.analyze_file_level_result()
+
+                model.line_level_prediction()
+                model.analyze_line_level_result()
+
+
+def test_parameter(clf):
+    print(f'======================== Glance {clf} ===========================')
+    eva_method = [mean, median]
+
+    for method in eva_method:
+        # 水平展示的变化数据, 列名为阈值
+        summary_data_horizontal, summary_data_vertical = list(), dict()
+        for indicator in indicators:
+            detail_data, column_names, mean_list = dict(), list(), list()
+            for threshold in line_level_thresholds:
+                model = select_model(clf, threshold)
+                column_names.append(model.model_name)
+                detail_data[model.model_name] = list(pd.read_csv(model.line_level_evaluation_file)[indicator])
+
+                mean_list.append(round(method(detail_data[model.model_name]), 3))
+
+            summary_data_horizontal.append(mean_list)
+            summary_data_vertical[indicator] = mean_list
+
+            detail_result = DataFrame(detail_data, index=get_test_releases_list(), columns=column_names)
+
+            make_path(f'{output_path}Glance-{clf}/')
+            detail_result.to_csv(f'{output_path}Glance-{clf}/{method.__name__}-{indicator}.csv', index=True)
+
+        # '5%', '10%', '15%', '20%', '25%', '30%', '35%', '40%', '45%', '50%',
+        # '55%', '60%', '65%', '70%', '75%', '80%', '85%', '90%', '95%', '100%',
+        threshold_indices = ['5%', '10%', '15%', '20%', '25%', '30%', '35%', '40%', '45%', '50%', ]
+        summary_result = DataFrame(summary_data_horizontal, index=indicators, columns=threshold_indices)
+        summary_result.to_csv(f'{output_path}Dis1-summary-{method.__name__}-Glance-{clf}-horizontal.csv', index=True)
+        summary_result = DataFrame(summary_data_vertical, index=threshold_indices, columns=indicators)
+        summary_result.to_csv(f'{output_path}Dis1-summary-{method.__name__}-Glance-{clf}-vertical.csv', index=True)
 
 
 if __name__ == '__main__':
-    trans()
+    #
+    file_level_classifiers = ['MD', 'EA', 'LR']
+    for classifier in file_level_classifiers:
+        # search_parameter_Glance(classifier)
+        test_parameter(classifier)
+        pass
+    pass
@@ -8,76 +8,45 @@
 
 sys.path.append('C:/Users/gzq-712/Desktop/Git/CLDP/')
 from src.models.glance import *
-from statistics import *
 
 # Ignore warning information
 warnings.filterwarnings('ignore')
 
-line_level_thresholds = [.05, .10, .15, .20, .25, .30, .35, .40, .45, .50]
 indicators = ['recall', 'far', 'ce', 'd2h', 'mcc', 'ifa', 'recall_20', 'ratio']
 
 output_path = '../../result/RQ1/'
 make_path(output_path)
 
 
-def select_model(file_level_classifier, line_level_threshold, train='', test=''):
+def select_model(file_level_classifier, train='', test=''):
     if file_level_classifier == 'MD':
-        model = Glance_MD(train, test, line_threshold=line_level_threshold, test=True)
+        model = Glance_MD(train, test)
     elif file_level_classifier == 'EA':
-        model = Glance_EA(train, test, line_threshold=line_level_threshold, test=True)
+        model = Glance_EA(train, test)
     else:
-        model = Glance_LR(train, test, line_threshold=line_level_threshold, test=True)
+        model = Glance_LR(train, test)
     return model
 
 
-def search_parameter_Glance(clf):
-    for threshold in line_level_thresholds:
-        for project, releases in get_project_releases_dict().items():
-            for i in range(len(releases) - 1):
-                # 1. Loading data. train data index = i, test data index = i + 1
-                model = select_model(clf, threshold, releases[i], releases[i + 1])
-
-                print(f'========== {model.model_name} CR PREDICTION for {releases[i + 1]} =================='[:60])
-                model.file_level_prediction()
-                model.analyze_file_level_result()
-
-                model.line_level_prediction()
-                model.analyze_line_level_result()
-
-
 def test_parameter(clf):
-    print(f'Glance {clf}')
-    # 水平展示的变化数据, 列名为与之
-    summary_data_horizontal, summary_data_vertical = list(), dict()
-    for indicator in indicators:
-        detail_data, column_names, mean_list = dict(), list(), list()
-        for threshold in line_level_thresholds:
-            model = select_model(clf, threshold)
-            column_names.append(model.model_name)
-            detail_data[model.model_name] = list(pd.read_csv(model.line_level_evaluation_file)[indicator])
-
-            mean_list.append(round(mean(detail_data[model.model_name]), 3))
+    print(f'======================== Glance {clf} ===========================')
+    detail_data, column_names = list(), list()
+    model = select_model(clf)
+    data = pd.read_csv(model.line_level_evaluation_file)[indicators]
+    last = 0
+    for project, release in get_project_releases_dict().items():
+        start, end = last, last + len(release[1:])
+        detail_data.append(list(data.iloc[start:end].mean(axis=0)))
+        column_names.append(project)
+        last = end
 
-        summary_data_horizontal.append(mean_list)
-        summary_data_vertical[indicator] = mean_list
-
-        detail_result = DataFrame(detail_data, index=get_test_releases_list(), columns=column_names)
-
-        make_path(f'{output_path}RQ1-Glance-{clf}/')
-        detail_result.to_csv(f'{output_path}RQ1-Glance-{clf}/{indicator}.csv', index=True)
-
-    threshold_indices = ['5%', '10%', '15%', '20%', '25%', '30%', '35%', '40%', '45%', '50%', ]
-    summary_result = DataFrame(summary_data_horizontal, index=indicators, columns=threshold_indices)
-    summary_result.to_csv(f'{output_path}RQ1-summary-Glance-{clf}-horizontal.csv', index=True)
-    summary_result = DataFrame(summary_data_vertical, index=threshold_indices, columns=indicators)
-    summary_result.to_csv(f'{output_path}RQ1-summary-Glance-{clf}-vertical.csv', index=True)
+    summary_result = DataFrame(detail_data, index=get_project_list(), columns=indicators)
+    summary_result.to_csv(f'{output_path}Glance-{clf}.csv', index=True)
 
 
 if __name__ == '__main__':
     #
     file_level_classifiers = ['MD', 'EA', 'LR']
     for classifier in file_level_classifiers:
-        # search_parameter_Glance(classifier)
         test_parameter(classifier)
-
     pass