@@ -239,7 +239,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
239
239
# Describe top features
240
240
n_opt = rfecv .named_steps .est .n_features_
241
241
importance = _extract_important_features (
242
- rfecv .named_steps .dv .get_feature_names (),
242
+ rfecv .named_steps .dv .get_feature_names_out (),
243
243
rfecv .named_steps .est .ranking_ )
244
244
importance = sort_importances (importance , ascending = True )[:n_opt ]
245
245
@@ -249,16 +249,17 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
249
249
250
250
251
251
def _extract_rfe_scores (rfecv ):
252
+ grid_scores_ = rfecv .cv_results_ ['mean_test_score' ]
252
253
n_features = len (rfecv .ranking_ )
253
254
# If using fractional step, step = integer of fraction * n_features
254
255
if rfecv .step < 1 :
255
256
rfecv .step = int (rfecv .step * n_features )
256
- # Need to manually calculate x-axis, as rfecv. grid_scores_ are a 1-d array
257
+ # Need to manually calculate x-axis, grid_scores_ is a 1-d array
257
258
x = [n_features - (n * rfecv .step )
258
- for n in range (len (rfecv . grid_scores_ )- 1 , - 1 , - 1 )]
259
+ for n in range (len (grid_scores_ )- 1 , - 1 , - 1 )]
259
260
if x [0 ] < 1 :
260
261
x [0 ] = 1
261
- return pd .Series (rfecv . grid_scores_ , index = x , name = 'Accuracy' )
262
+ return pd .Series (grid_scores_ , index = x , name = 'Accuracy' )
262
263
263
264
264
265
def nested_cross_validation (table , metadata , cv , random_state , n_jobs ,
@@ -411,12 +412,12 @@ def _calculate_feature_importances(estimator):
411
412
# feature_importances_ or coef_ to report feature importance/weights
412
413
try :
413
414
importances = _extract_important_features (
414
- estimator .named_steps .dv .get_feature_names (),
415
+ estimator .named_steps .dv .get_feature_names_out (),
415
416
estimator .named_steps .est .feature_importances_ )
416
417
# is there a better way to determine whether estimator has coef_ ?
417
418
except AttributeError :
418
419
importances = _extract_important_features (
419
- estimator .named_steps .dv .get_feature_names (),
420
+ estimator .named_steps .dv .get_feature_names_out (),
420
421
estimator .named_steps .est .coef_ )
421
422
return importances
422
423
@@ -718,7 +719,7 @@ def _mean_feature_importance(importances):
718
719
def _null_feature_importance (table ):
719
720
feature_extractor = DictVectorizer ()
720
721
feature_extractor .fit (table )
721
- imp = pd .DataFrame (index = feature_extractor .get_feature_names ())
722
+ imp = pd .DataFrame (index = feature_extractor .get_feature_names_out ())
722
723
imp .index .name = "feature"
723
724
imp ["importance" ] = 1
724
725
return imp
@@ -827,8 +828,9 @@ def _train_adaboost_base_estimator(table, metadata, column, base_estimator,
827
828
828
829
return Pipeline (
829
830
[('dv' , estimator .named_steps .dv ),
830
- ('est' , adaboost_estimator (estimator .named_steps .est ,
831
- n_estimators , random_state = random_state ))])
831
+ ('est' , adaboost_estimator (estimator = estimator .named_steps .est ,
832
+ n_estimators = n_estimators ,
833
+ random_state = random_state ))])
832
834
833
835
834
836
def _disable_feature_selection (estimator , optimize_feature_selection ):
0 commit comments