Skip to content

Commit 30e4c49

Browse files
small code cleanup
1 parent 0b9ebf3 commit 30e4c49

File tree

3 files changed

+11
-19
lines changed

3 files changed

+11
-19
lines changed

examples/random_forest.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
from timeit import default_timer
2-
start = default_timer()
31
import logging
42

53
import numpy as np
6-
from sklearn.datasets import make_classification, load_boston, load_digits, load_breast_cancer, load_iris
4+
from sklearn.datasets import make_classification
75
from sklearn.datasets import make_regression
86
from sklearn.metrics import roc_auc_score, accuracy_score
97

@@ -23,16 +21,17 @@ def classification():
2321
X, y = make_classification(
2422
n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0
2523
)
26-
#X,y = load_breast_cancer(return_X_y=True)
2724

2825
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
2926

30-
model = RandomForestClassifier(n_estimators=5, max_depth=4)
27+
model = RandomForestClassifier(n_estimators=10, max_depth=4)
3128
model.fit(X_train, y_train)
32-
predictions = model.predict(X_test)[:,1]
33-
#predictions = np.argmax(model.predict(X_test),axis=1)
34-
print(predictions.shape)
35-
print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions))
29+
30+
predictions_prob = model.predict(X_test)[:, 1]
31+
predictions = np.argmax(model.predict(X_test), axis=1)
32+
#print(predictions.shape)
33+
print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions_prob))
34+
print("classification, accuracy score: %s" % accuracy_score(y_test, predictions))
3635

3736

3837
def regression():
@@ -51,5 +50,3 @@ def regression():
5150
if __name__ == "__main__":
5251
classification()
5352
# regression()
54-
end = default_timer()
55-
print(end-start)

mla/ensemble/random_forest.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,10 @@ def _predict(self, X=None):
8080
for i in range(X.shape[0]):
8181
row_pred = np.zeros(y_shape)
8282
for tree in self.trees:
83-
tmp = tree.predict_row(X[i, :])
84-
print(tmp,row_pred.shape,row_pred)
85-
row_pred += tmp
86-
83+
row_pred += tree.predict_row(X[i, :])
8784

8885
row_pred /= self.n_estimators
8986
predictions[i, :] = row_pred
90-
print(f"i={i},{row_pred}\n")
9187
return predictions
9288

9389

mla/ensemble/tree.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def _find_best_split(self, X, target, n_features):
6565
return max_col, max_val, max_gain
6666

6767
def train(self, X, target, max_features=None, min_samples_split=10, max_depth=None,
68-
minimum_gain=0.01, loss=None, n_classes = None):
68+
minimum_gain=0.01, loss=None, n_classes=None):
6969
"""Build a decision tree from training set.
7070
7171
Parameters
@@ -85,7 +85,7 @@ def train(self, X, target, max_features=None, min_samples_split=10, max_depth=No
8585
Minimum gain required for splitting.
8686
loss : function, default None
8787
Loss function for gradient boosting.
88-
n_classes : int, default None
88+
n_classes : int or None
8989
No of unique labels in case of classification
9090
"""
9191

@@ -143,7 +143,6 @@ def _calculate_leaf_value(self, targets, n_classes):
143143
self.outcome = np.mean(targets["y"])
144144
else:
145145
# Probability for classification task
146-
#self.outcome = stats.itemfreq(targets["y"])[:, 1] / float(targets["y"].shape[0])
147146
self.outcome = np.bincount(targets["y"], minlength=n_classes) / targets["y"].shape[0]
148147

149148
def predict_row(self, row):

0 commit comments

Comments
 (0)