@@ -64,7 +64,8 @@ def _find_best_split(self, X, target, n_features):
64
64
max_col , max_val , max_gain = column , value , gain
65
65
return max_col , max_val , max_gain
66
66
67
- def train (self , X , target , max_features = None , min_samples_split = 10 , max_depth = None , minimum_gain = 0.01 , loss = None ):
67
+ def train (self , X , target , max_features = None , min_samples_split = 10 , max_depth = None ,
68
+ minimum_gain = 0.01 , loss = None , n_classes = None ):
68
69
"""Build a decision tree from training set.
69
70
70
71
Parameters
@@ -84,6 +85,8 @@ def train(self, X, target, max_features=None, min_samples_split=10, max_depth=No
84
85
Minimum gain required for splitting.
85
86
loss : function, default None
86
87
Loss function for gradient boosting.
88
+ n_classes : int, default None
89
+ No of unique labels in case of classification
87
90
"""
88
91
89
92
if not isinstance (target , dict ):
@@ -118,17 +121,17 @@ def train(self, X, target, max_features=None, min_samples_split=10, max_depth=No
118
121
# Grow left and right child
119
122
self .left_child = Tree (self .regression , self .criterion )
120
123
self .left_child .train (
121
- left_X , left_target , max_features , min_samples_split , max_depth - 1 , minimum_gain , loss
124
+ left_X , left_target , max_features , min_samples_split , max_depth - 1 , minimum_gain , loss , n_classes
122
125
)
123
126
124
127
self .right_child = Tree (self .regression , self .criterion )
125
128
self .right_child .train (
126
- right_X , right_target , max_features , min_samples_split , max_depth - 1 , minimum_gain , loss
129
+ right_X , right_target , max_features , min_samples_split , max_depth - 1 , minimum_gain , loss , n_classes
127
130
)
128
131
except AssertionError :
129
- self ._calculate_leaf_value (target )
132
+ self ._calculate_leaf_value (target , n_classes )
130
133
131
- def _calculate_leaf_value (self , targets ):
134
+ def _calculate_leaf_value (self , targets , n_classes ):
132
135
"""Find optimal value for leaf."""
133
136
if self .loss is not None :
134
137
# Gradient boosting
@@ -140,7 +143,8 @@ def _calculate_leaf_value(self, targets):
140
143
self .outcome = np .mean (targets ["y" ])
141
144
else :
142
145
# Probability for classification task
143
- self .outcome = stats .itemfreq (targets ["y" ])[:, 1 ] / float (targets ["y" ].shape [0 ])
146
+ #self.outcome = stats.itemfreq(targets["y"])[:, 1] / float(targets["y"].shape[0])
147
+ self .outcome = np .bincount (targets ["y" ], minlength = n_classes ) / targets ["y" ].shape [0 ]
144
148
145
149
def predict_row (self , row ):
146
150
"""Predict single row."""
0 commit comments