-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexport_model.py
44 lines (32 loc) · 1.36 KB
/
export_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, matthews_corrcoef
from utils import preprocess_dataset
import pandas as pd
import joblib
import os
def export_model(dataset_path=os.path.join('datasets','train','merged.csv'), model_path=os.path.join("model','model.pkl")):
df = pd.read_csv(dataset_path)
print(f"Dataset loaded with {len(df)} records.")
# Preprocess Dataset
df = preprocess_dataset(df)
print("Dataset preprocessed successfully.")
print(df.head())
# Separate features and labels
X = df.drop(columns=['label'])
y = df['label']
# Train/Test Split
train_data, test_data, train_label, test_label = train_test_split(
X, y, test_size=0.1, shuffle=True
)
clf = XGBClassifier(n_estimators=210)
clf.fit(train_data, train_label)
predicted_labels = clf.predict(test_data) # Supervised/unsupervised prediction
# Evaluate Model
acc_score = accuracy_score(test_label, predicted_labels)
mcc = matthews_corrcoef(test_label, predicted_labels)
tn, fp, fn, tp = confusion_matrix(test_label, predicted_labels).ravel()
print(f"Accuracy {acc_score}, MCC {mcc}, TN: {tn} FP: {fp} FN: {fn} TP: {tp}")
joblib.dump(clf, model_path)
if __name__== "__main__":
export_model()