-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassification_RGB.py
90 lines (77 loc) · 2.73 KB
/
classification_RGB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import (
accuracy_score,
confusion_matrix,
precision_score,
recall_score,
f1_score,
)
from sklearn.impute import SimpleImputer
# Load CSV files
train_csv = "train_RGB.csv"
test_csv = "test_RGB.csv"
train_df = pd.read_csv(train_csv)
test_df = pd.read_csv(test_csv)
# Combine all datasets
combined_df = pd.concat([train_df, test_df], ignore_index=True)
# Encode labels
label_encoder = LabelEncoder()
combined_df["Label"] = label_encoder.fit_transform(combined_df["Label"])
# Separate features and labels
X = combined_df.drop("Label", axis=1)
y = combined_df["Label"]
# Impute missing values (NaN) with mean of the column
imputer = SimpleImputer(strategy="mean")
X_imputed = imputer.fit_transform(X)
# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)
# Perform 80:20 train-test split
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42
)
# Initialize classifiers
classifiers = {
"KNN": KNeighborsClassifier(n_neighbors=4),
"Random Forest": RandomForestClassifier(n_estimators=300, random_state=42),
"SVM": SVC(kernel="linear", C=1.0, random_state=42),
"LDA": LinearDiscriminantAnalysis(),
}
# Train and evaluate classifiers
results = {}
confusion_matrices = {}
for name, clf in classifiers.items():
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
confusion_matrices[name] = confusion_matrix(y_test, y_pred)
results[name] = accuracy
print(f"{name} Accuracy: {accuracy}")
# Save results to a DataFrame
results_df = pd.DataFrame.from_dict(results, orient="index", columns=["Accuracy"])
results_df.index.name = "Classifier"
results_csv = "results/rgb/resultsWithGLCMAndColor_RGB.csv"
results_df.to_csv(results_csv)
print(f"Results saved to {results_csv}")
# Compute precision, recall, and F1-score
precision = {}
recall = {}
f1 = {}
for name, cm in confusion_matrices.items():
y_pred = classifiers[name].predict(X_test)
precision[name] = precision_score(y_test, y_pred, average="weighted")
recall[name] = recall_score(y_test, y_pred, average="weighted")
f1[name] = f1_score(y_test, y_pred, average="weighted")
# Print metrics
print("\nMetrics:")
for name in classifiers.keys():
print(f"{name}:")
print(f" Precision: {precision[name]}")
print(f" Recall: {recall[name]}")
print(f" F1-score: {f1[name]}")