-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathReal-World_Comp.py
69 lines (55 loc) · 2.18 KB
/
Real-World_Comp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
# Step 1: Load the dataset
data = pd.read_csv("Datasets/main.csv") # Replace with the correct file path
# Step 2: Analyze real-life depression level percentages
real_counts = data['depressionLevel'].value_counts()
real_percentages = (real_counts / len(data)) * 100
# Display the real-life percentages
print("Real-Life Depression Level Percentages:")
print(real_percentages)
# Step 3: Prepare the data for prediction
x = data.drop(columns=['depressionLevel']) # Features
y = data['depressionLevel'] # Target
# Encode categorical target variable
le = LabelEncoder()
y_encoded = le.fit_transform(y)
# Split the dataset
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.4, random_state=25)
# Train SVM Model
svm_model = SVC(kernel='rbf', probability=True)
svm_model.fit(x_train, y_train)
# Generate predictions
y_pred = svm_model.predict(x_test)
# Step 4: Analyze predicted percentages
predicted_counts = pd.Series(y_pred).value_counts()
predicted_percentages = (predicted_counts / len(y_pred)) * 100
# Match predicted classes with their labels
predicted_classes = le.inverse_transform(predicted_counts.index)
predicted_percentages_dict = dict(zip(predicted_classes, predicted_percentages))
# Step 5: Realistic Depression Level Percentages
realistic_percentages = {
"Low": 40,
"Mild": 30,
"Moderate": 20,
"High": 10
}
# Combine data into a DataFrame for comparison
all_levels = ['Low', 'Mild', 'Moderate', 'High']
comparison_df = pd.DataFrame({
"Real-Life": [real_percentages.get(level, 0) for level in all_levels],
"Predicted": [predicted_percentages_dict.get(level, 0) for level in all_levels]
}, index=all_levels)
print("\nComparison of Distributions:")
print(comparison_df)
# Step 6: Plot comparison bar chart
comparison_df.plot(kind='bar', figsize=(10, 6), color=['lightblue', 'orange', 'green'])
plt.title("Comparison of Depression Level Distributions")
plt.ylabel("Percentage (%)")
plt.xlabel("Depression Levels")
plt.legend(title="Distribution Source")
plt.tight_layout()
plt.show()