small code cleanup

Shihab-Shahriar · Shihab-Shahriar · commit 30e4c49f443c · 2020-05-29T18:32:01.000+06:00
diff --git a/examples/random_forest.py b/examples/random_forest.py
@@ -1,9 +1,7 @@
-from timeit import default_timer
-start = default_timer()
 import logging
 
 import numpy as np
-from sklearn.datasets import make_classification, load_boston, load_digits, load_breast_cancer, load_iris
+from sklearn.datasets import make_classification
 from sklearn.datasets import make_regression
 from sklearn.metrics import roc_auc_score, accuracy_score
 
@@ -23,16 +21,17 @@ def classification():
     X, y = make_classification(
         n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0
     )
-    #X,y = load_breast_cancer(return_X_y=True)   
 
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)
 
-    model = RandomForestClassifier(n_estimators=5, max_depth=4)
+    model = RandomForestClassifier(n_estimators=10, max_depth=4)
     model.fit(X_train, y_train)
-    predictions = model.predict(X_test)[:,1]
-    #predictions = np.argmax(model.predict(X_test),axis=1)
-    print(predictions.shape)
-    print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions))
+
+    predictions_prob = model.predict(X_test)[:, 1]
+    predictions = np.argmax(model.predict(X_test), axis=1)
+    #print(predictions.shape)
+    print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions_prob))
+    print("classification, accuracy score: %s" % accuracy_score(y_test, predictions))
 
 
 def regression():
@@ -51,5 +50,3 @@ def regression():
 if __name__ == "__main__":
     classification()
     # regression()
-    end = default_timer()
-    print(end-start)
diff --git a/mla/ensemble/random_forest.py b/mla/ensemble/random_forest.py
@@ -80,14 +80,10 @@ def _predict(self, X=None):
         for i in range(X.shape[0]):
             row_pred = np.zeros(y_shape)
             for tree in self.trees:
-                tmp = tree.predict_row(X[i, :])
-                print(tmp,row_pred.shape,row_pred)
-                row_pred += tmp
-
+                row_pred += tree.predict_row(X[i, :])
 
             row_pred /= self.n_estimators
             predictions[i, :] = row_pred
-            print(f"i={i},{row_pred}\n")
         return predictions
 
 
diff --git a/mla/ensemble/tree.py b/mla/ensemble/tree.py
@@ -65,7 +65,7 @@ def _find_best_split(self, X, target, n_features):
         return max_col, max_val, max_gain
 
     def train(self, X, target, max_features=None, min_samples_split=10, max_depth=None, 
-                minimum_gain=0.01, loss=None, n_classes = None):
+                minimum_gain=0.01, loss=None, n_classes=None):
         """Build a decision tree from training set.
 
         Parameters
@@ -85,7 +85,7 @@ def train(self, X, target, max_features=None, min_samples_split=10, max_depth=No
             Minimum gain required for splitting.
         loss : function, default None
             Loss function for gradient boosting.
-        n_classes : int, default None
+        n_classes : int or None
             No of unique labels in case of classification
         """
 
@@ -143,7 +143,6 @@ def _calculate_leaf_value(self, targets, n_classes):
                 self.outcome = np.mean(targets["y"])
             else:
                 # Probability for classification task
-                #self.outcome = stats.itemfreq(targets["y"])[:, 1] / float(targets["y"].shape[0])
                 self.outcome = np.bincount(targets["y"], minlength=n_classes) / targets["y"].shape[0]
 
     def predict_row(self, row):