-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrandomForest.py
36 lines (28 loc) · 986 Bytes
/
randomForest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 20 15:24:01 2018
@author: bking
"""
import pandas as pd
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np
import pickle
#from sklearn.preprocessing import LabelEncoder
# Load data
train_df = pd.read_csv("data/train_pre.csv",index_col=0)
test_df = pd.read_csv("data/test_pre.csv",index_col=0)
# Prepare data
train_y = train_df['totals.transactionRevenue']
train_x = train_df.drop(['totals.transactionRevenue'],axis=1)
# Random Forest Regression with HyperParameter Tuning
rf = RandomForestRegressor()
#param = {'max_depth':[3,6,10,15,20,None]}
param = {'max_depth':[3,10]}
rf_cv = GridSearchCV(rf,param,cv=2,verbose=True,scoring='neg_mean_squared_log_error')
rf_cv.fit(train_x, train_y)
# save the model to disk
filename = 'model/rf_cv.sav'
pickle.dump(rf_cv, open(filename, 'wb'))
best_model = rf_cv.best_estimator_