-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassificationConfusion.py
96 lines (81 loc) · 3.26 KB
/
classificationConfusion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
import seaborn as sns
# Load CSV files
train_csv = "train.csv"
test_csv = "test.csv"
val_csv = "val.csv"
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)
val_df = pd.read_csv(val_csv)
# Combine all three datasets
combined_df = pd.concat([train_df, test_df, val_df], ignore_index=True)
# Encode labels
label_encoder = LabelEncoder()
combined_df["Label"] = label_encoder.fit_transform(combined_df["Label"])
# Separate features and labels
X = combined_df.drop("Label", axis=1)
y = combined_df["Label"]
# Impute missing values (NaN) with mean of the column
imputer = SimpleImputer(strategy="mean")
X_imputed = imputer.fit_transform(X)
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)
# Perform 80:20 train-test split
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=37
)
# Initialize classifiers
classifiers = {
# "KNN": KNeighborsClassifier(n_neighbors=4),
# "Random Forest": RandomForestClassifier(n_estimators=300, random_state=42),
"SVM": SVC(kernel="linear", C=1.0, random_state=42),
# "LDA": LinearDiscriminantAnalysis(),
}
# Train and evaluate classifiers
results = {}
precision = {}
recall = {}
f1 = {}
confusion_matrices = {}
for name, clf in classifiers.items():
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
results[name] = accuracy
precision[name] = precision_score(y_test, y_pred, average="weighted")
recall[name] = recall_score(y_test, y_pred, average="weighted")
f1[name] = f1_score(y_test, y_pred, average="weighted")
confusion_matrices[name] = confusion_matrix(y_test, y_pred)
print(f"{name} Accuracy: {accuracy}")
# Save results to a DataFrame
results_df = pd.DataFrame.from_dict(results, orient="index", columns=["Accuracy"])
results_df.index.name = "Classifier"
results_csv = "results/thermal/resultsWithGLCMAndColor.csv"
results_df.to_csv(results_csv)
print(f"Results saved to {results_csv}")
# Function to plot a single confusion matrix with larger font size
def plot_confusion_matrix(cm, title="Confusion Matrix"):
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.5) # Increase the font size
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, annot_kws={"size": 23}) # Adjust annotation size
plt.title(title, fontsize=22)
plt.xlabel('Predicted Label', fontsize=20)
plt.ylabel('True Label', fontsize=20)
plt.show()
# Plot confusion matrix for the SVM classifier
plot_confusion_matrix(confusion_matrices["SVM"], title="SVM Confusion Matrix for thermal images")
# Print precision, recall, and F1-score for SVM
print("\nMetrics for SVM:")
print(f" Precision: {precision['SVM']}")
print(f" Recall: {recall['SVM']}")
print(f" F1-score: {f1['SVM']}")