Skip to content

Commit 67f3cc5

Browse files
authored
MAINT: updates for Python 3.9 (#233)
1 parent 54eed98 commit 67f3cc5

File tree

6 files changed

+24
-22
lines changed

6 files changed

+24
-22
lines changed

q2_sample_classifier/classify.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def metatable(ctx,
8383
raise ValueError('Missing samples in metadata: %r' %
8484
table_ids.difference(metadata_ids))
8585
else:
86-
metadata = metadata.loc[sample_ids]
86+
metadata = metadata.loc[list(sample_ids)]
8787
if len(sample_ids) < len(table_ids):
8888
tab = tab.filter(
8989
ids_to_keep=sample_ids, axis='sample', inplace=False)

q2_sample_classifier/tests/test_actions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def test_action_split_table(self):
5959
self.assertEqual(y_train.name, 'bugs')
6060

6161
# test if complete target column is covered
62-
y_all = y_train.append(y_test).sort_index()
62+
y_all = pd.concat([y_train, y_test]).sort_index()
6363
y_all.index.name = 'SampleID'
6464
pdt.assert_series_equal(y_all, self.md._series)
6565

q2_sample_classifier/tests/test_estimators.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def _load_cmc(md_fp, column):
117117
index_col=0, names=['feature', 'importance'])
118118
self.exp_pred = pd.read_csv(
119119
self.get_data_path('predictions.tsv'), sep='\t', header=0,
120-
index_col=0, squeeze=True)
120+
index_col=0).squeeze('columns')
121121
index = pd.Index(['A', 'B', 'C', 'D'], name='id')
122122
self.table_percnorm = qiime2.Artifact.import_data(
123123
FeatureTable[PercentileNormalized], pd.DataFrame(
@@ -135,7 +135,7 @@ def test_extract_features(self):
135135
dv = DictVectorizer()
136136
dv.fit(dicts)
137137
features = table.ids('observation')
138-
self.assertEqual(set(dv.get_feature_names()), set(features))
138+
self.assertEqual(set(dv.get_feature_names_out()), set(features))
139139
self.assertEqual(len(dicts), len(table.ids()))
140140
for dict_row, (table_row, _, _) in zip(dicts, table.iter()):
141141
for feature, count in zip(features, table_row):
@@ -398,7 +398,7 @@ def test_train_adaboost_decision_tree(self):
398398
parameter_tuning=True, classification=True,
399399
missing_samples='ignore', base_estimator="DecisionTree")
400400
self.assertEqual(type(abe.named_steps.est), AdaBoostClassifier)
401-
self.assertEqual(type(abe.named_steps.est.base_estimator),
401+
self.assertEqual(type(abe.named_steps.est.estimator),
402402
DecisionTreeClassifier)
403403

404404
def test_train_adaboost_extra_trees(self):
@@ -408,7 +408,7 @@ def test_train_adaboost_extra_trees(self):
408408
parameter_tuning=True, classification=True,
409409
missing_samples='ignore', base_estimator="ExtraTrees")
410410
self.assertEqual(type(abe.named_steps.est), AdaBoostClassifier)
411-
self.assertEqual(type(abe.named_steps.est.base_estimator),
411+
self.assertEqual(type(abe.named_steps.est.estimator),
412412
ExtraTreeClassifier)
413413

414414
# test some invalid inputs/edge cases
@@ -504,7 +504,7 @@ def test_predict_classifications(self):
504504
ls_pred_classes = prob.columns.tolist()
505505
ls_correct_range = [col for col in ls_pred_classes if
506506
prob[col].between(
507-
0, 1, inclusive=True).all()]
507+
0, 1, inclusive="both").all()]
508508
self.assertEqual(len(ls_correct_range), prob.shape[1],
509509
msg='Predicted probabilities of class {}'
510510
'are not in range [0,1]'.format(

q2_sample_classifier/tests/test_types_formats_transformers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def test_pd_series_to_boolean_format(self):
8585
name='outlier', index=exp_index)
8686
obs = transformer(exp)
8787
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
88-
squeeze=True)
88+
).squeeze('columns')
8989
self.assertEqual(sorted(exp), sorted(obs))
9090

9191
def test_boolean_format_to_pd_series(self):
@@ -152,7 +152,7 @@ def test_pd_series_to_Predictions_format(self):
152152
name='prediction', index=['a', 'b', 'c', 'd'])
153153
obs = transformer(exp)
154154
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
155-
squeeze=True)
155+
).squeeze('columns')
156156
pdt.assert_series_equal(obs, exp)
157157

158158
def test_pd_series_to_Predictions_format_allow_nans(self):
@@ -161,7 +161,7 @@ def test_pd_series_to_Predictions_format_allow_nans(self):
161161
name='prediction', index=['a', 'b', 'c', 'd'])
162162
obs = transformer(exp)
163163
obs = pd.read_csv(str(obs), sep='\t', header=0, index_col=0,
164-
squeeze=True)
164+
).squeeze('columns')
165165
pdt.assert_series_equal(obs, exp)
166166

167167
def test_Predictions_format_to_pd_series(self):

q2_sample_classifier/utilities.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
239239
# Describe top features
240240
n_opt = rfecv.named_steps.est.n_features_
241241
importance = _extract_important_features(
242-
rfecv.named_steps.dv.get_feature_names(),
242+
rfecv.named_steps.dv.get_feature_names_out(),
243243
rfecv.named_steps.est.ranking_)
244244
importance = sort_importances(importance, ascending=True)[:n_opt]
245245

@@ -249,16 +249,17 @@ def _rfecv_feature_selection(feature_data, targets, estimator,
249249

250250

251251
def _extract_rfe_scores(rfecv):
252+
grid_scores_ = rfecv.cv_results_['mean_test_score']
252253
n_features = len(rfecv.ranking_)
253254
# If using fractional step, step = integer of fraction * n_features
254255
if rfecv.step < 1:
255256
rfecv.step = int(rfecv.step * n_features)
256-
# Need to manually calculate x-axis, as rfecv.grid_scores_ are a 1-d array
257+
# Need to manually calculate x-axis, grid_scores_ is a 1-d array
257258
x = [n_features - (n * rfecv.step)
258-
for n in range(len(rfecv.grid_scores_)-1, -1, -1)]
259+
for n in range(len(grid_scores_)-1, -1, -1)]
259260
if x[0] < 1:
260261
x[0] = 1
261-
return pd.Series(rfecv.grid_scores_, index=x, name='Accuracy')
262+
return pd.Series(grid_scores_, index=x, name='Accuracy')
262263

263264

264265
def nested_cross_validation(table, metadata, cv, random_state, n_jobs,
@@ -411,12 +412,12 @@ def _calculate_feature_importances(estimator):
411412
# feature_importances_ or coef_ to report feature importance/weights
412413
try:
413414
importances = _extract_important_features(
414-
estimator.named_steps.dv.get_feature_names(),
415+
estimator.named_steps.dv.get_feature_names_out(),
415416
estimator.named_steps.est.feature_importances_)
416417
# is there a better way to determine whether estimator has coef_ ?
417418
except AttributeError:
418419
importances = _extract_important_features(
419-
estimator.named_steps.dv.get_feature_names(),
420+
estimator.named_steps.dv.get_feature_names_out(),
420421
estimator.named_steps.est.coef_)
421422
return importances
422423

@@ -718,7 +719,7 @@ def _mean_feature_importance(importances):
718719
def _null_feature_importance(table):
719720
feature_extractor = DictVectorizer()
720721
feature_extractor.fit(table)
721-
imp = pd.DataFrame(index=feature_extractor.get_feature_names())
722+
imp = pd.DataFrame(index=feature_extractor.get_feature_names_out())
722723
imp.index.name = "feature"
723724
imp["importance"] = 1
724725
return imp
@@ -827,8 +828,9 @@ def _train_adaboost_base_estimator(table, metadata, column, base_estimator,
827828

828829
return Pipeline(
829830
[('dv', estimator.named_steps.dv),
830-
('est', adaboost_estimator(estimator.named_steps.est,
831-
n_estimators, random_state=random_state))])
831+
('est', adaboost_estimator(estimator=estimator.named_steps.est,
832+
n_estimators=n_estimators,
833+
random_state=random_state))])
832834

833835

834836
def _disable_feature_selection(estimator, optimize_feature_selection):

q2_sample_classifier/visuals.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,9 @@ def _plot_confusion_matrix(y_test, y_pred, classes, normalize, palette,
167167
predictions.loc["Overall Accuracy"] = ""
168168
predictions.loc["Baseline Accuracy"] = ""
169169
predictions.loc["Accuracy Ratio"] = ""
170-
predictions.loc["Overall Accuracy"]["Overall Accuracy"] = accuracy
171-
predictions.loc["Baseline Accuracy"]["Overall Accuracy"] = basline_accuracy
172-
predictions.loc["Accuracy Ratio"]["Overall Accuracy"] = accuracy_ratio
170+
predictions.loc["Overall Accuracy", "Overall Accuracy"] = accuracy
171+
predictions.loc["Baseline Accuracy", "Overall Accuracy"] = basline_accuracy
172+
predictions.loc["Accuracy Ratio", "Overall Accuracy"] = accuracy_ratio
173173

174174
return predictions, confusion
175175

0 commit comments

Comments
 (0)