14
14
{fitted} - If the automl case has been fitted
15
15
{cv, holdout} - Whether explicitly cv or holdout was used
16
16
{no_ensemble} - Fit with no ensemble size
17
- {cached} - If the resulting case is then cached
18
17
{multiobjective} - If the automl instance is multiobjective
19
18
"""
20
19
from __future__ import annotations
24
23
from pathlib import Path
25
24
26
25
import numpy as np
26
+ import sklearn .model_selection
27
27
28
28
import autosklearn .metrics
29
29
from autosklearn .automl import AutoMLClassifier , AutoMLRegressor
30
30
from autosklearn .automl_common .common .utils .backend import Backend
31
+ from autosklearn .evaluation .abstract_evaluator import (
32
+ MyDummyClassifier ,
33
+ MyDummyRegressor ,
34
+ )
31
35
32
36
from pytest_cases import case , parametrize
33
37
34
38
from test .fixtures .backend import copy_backend
35
39
from test .fixtures .caching import Cache
36
40
37
41
42
+ def stop_at_first (smbo , run_info , result , time_left ) -> bool :
43
+ """Used in some cases to enforce the only valid model is the dummy model"""
44
+ return False
45
+
46
+
38
47
@case (tags = ["classifier" ])
39
48
def case_classifier (
40
49
tmp_dir : str ,
@@ -60,7 +69,7 @@ def case_regressor(
60
69
# ###################################
61
70
# The following are fitted and cached
62
71
# ###################################
63
- @case (tags = ["classifier" , "fitted" , "holdout" , "cached" ])
72
+ @case (tags = ["classifier" , "fitted" , "holdout" ])
64
73
@parametrize ("dataset" , ["iris" ])
65
74
def case_classifier_fitted_holdout_iterative (
66
75
dataset : str ,
@@ -97,7 +106,7 @@ def case_classifier_fitted_holdout_iterative(
97
106
return model
98
107
99
108
100
- @case (tags = ["classifier" , "fitted" , "cv" , "cached" ])
109
+ @case (tags = ["classifier" , "fitted" , "cv" ])
101
110
@parametrize ("dataset" , ["iris" ])
102
111
def case_classifier_fitted_cv (
103
112
make_cache : Callable [[str ], Cache ],
@@ -134,7 +143,7 @@ def case_classifier_fitted_cv(
134
143
return model
135
144
136
145
137
- @case (tags = ["classifier" , "fitted" , "holdout" , "cached" , " multiobjective" ])
146
+ @case (tags = ["classifier" , "fitted" , "holdout" , "multiobjective" ])
138
147
@parametrize ("dataset" , ["iris" ])
139
148
def case_classifier_fitted_holdout_multiobjective (
140
149
dataset : str ,
@@ -177,7 +186,7 @@ def case_classifier_fitted_holdout_multiobjective(
177
186
return model
178
187
179
188
180
- @case (tags = ["regressor" , "fitted" , "holdout" , "cached" ])
189
+ @case (tags = ["regressor" , "fitted" , "holdout" ])
181
190
@parametrize ("dataset" , ["boston" ])
182
191
def case_regressor_fitted_holdout (
183
192
make_cache : Callable [[str ], Cache ],
@@ -212,7 +221,7 @@ def case_regressor_fitted_holdout(
212
221
return model
213
222
214
223
215
- @case (tags = ["regressor" , "fitted" , "cv" , "cached" ])
224
+ @case (tags = ["regressor" , "fitted" , "cv" ])
216
225
@parametrize ("dataset" , ["boston" ])
217
226
def case_regressor_fitted_cv (
218
227
make_cache : Callable [[str ], Cache ],
@@ -249,7 +258,7 @@ def case_regressor_fitted_cv(
249
258
return model
250
259
251
260
252
- @case (tags = ["classifier" , "fitted" , "no_ensemble" , "cached" ])
261
+ @case (tags = ["classifier" , "fitted" , "no_ensemble" ])
253
262
@parametrize ("dataset" , ["iris" ])
254
263
def case_classifier_fitted_no_ensemble (
255
264
make_cache : Callable [[str ], Cache ],
@@ -258,8 +267,7 @@ def case_classifier_fitted_no_ensemble(
258
267
make_automl_classifier : Callable [..., AutoMLClassifier ],
259
268
make_sklearn_dataset : Callable [..., Tuple [np .ndarray , ...]],
260
269
) -> AutoMLClassifier :
261
- """Case of a fitted classifier but ensemble was disabled by
262
- not writing models to disk"""
270
+ """Case of a fitted classifier but ensemble was disabled"""
263
271
key = f"case_classifier_fitted_no_ensemble_{ dataset } "
264
272
265
273
# This locks the cache for this item while we check, required for pytest-xdist
@@ -270,7 +278,6 @@ def case_classifier_fitted_no_ensemble(
270
278
temporary_directory = cache .path ("backend" ),
271
279
delete_tmp_folder_after_terminate = False ,
272
280
ensemble_class = None ,
273
- disable_evaluator_output = True ,
274
281
)
275
282
276
283
X , y , Xt , yt = make_sklearn_dataset (name = dataset )
@@ -282,3 +289,85 @@ def case_classifier_fitted_no_ensemble(
282
289
model ._backend = copy_backend (old = model ._backend , new = make_backend ())
283
290
284
291
return model
292
+
293
+
294
+ @case (tags = ["classifier" , "fitted" ])
295
+ def case_classifier_fitted_only_dummy (
296
+ make_cache : Callable [[str ], Cache ],
297
+ make_backend : Callable [..., Backend ],
298
+ make_automl_classifier : Callable [..., AutoMLClassifier ],
299
+ ) -> AutoMLClassifier :
300
+ """Case of a fitted classifier but only dummy was found"""
301
+ key = "case_classifier_fitted_only_dummy"
302
+
303
+ # This locks the cache for this item while we check, required for pytest-xdist
304
+
305
+ with make_cache (key ) as cache :
306
+ if "model" not in cache :
307
+ model = make_automl_classifier (
308
+ temporary_directory = cache .path ("backend" ),
309
+ delete_tmp_folder_after_terminate = False ,
310
+ include = {"classifier" : ["bernoulli_nb" ]}, # Just a meh model
311
+ get_trials_callback = stop_at_first ,
312
+ )
313
+ rand = np .random .RandomState (2 )
314
+ _X = rand .random ((100 , 50 ))
315
+ _y = rand .randint (0 , 2 , (100 ,))
316
+ X , Xt , y , yt = sklearn .model_selection .train_test_split (
317
+ _X , _y , random_state = 1 # Required to ensure dummy is best
318
+ )
319
+ model .fit (X , y , dataset_name = "random" )
320
+
321
+ # We now validate that indeed, the only model is the Dummy
322
+ members = list (model .models_ .values ())
323
+ if len (members ) != 1 and not isinstance (members [0 ], MyDummyClassifier ):
324
+ raise ValueError ("Should only have one model, dummy\n " , members )
325
+
326
+ cache .save (model , "model" )
327
+
328
+ model = cache .load ("model" )
329
+ model ._backend = copy_backend (old = model ._backend , new = make_backend ())
330
+
331
+ return model
332
+
333
+
334
+ @case (tags = ["regressor" , "fitted" ])
335
+ def case_regressor_fitted_only_dummy (
336
+ make_cache : Callable [[str ], Cache ],
337
+ make_backend : Callable [..., Backend ],
338
+ make_automl_regressor : Callable [..., AutoMLRegressor ],
339
+ ) -> AutoMLRegressor :
340
+ """Case of a fitted classifier but only dummy was found"""
341
+ key = "case_regressor_fitted_only_dummy"
342
+
343
+ # This locks the cache for this item while we check, required for pytest-xdist
344
+
345
+ with make_cache (key ) as cache :
346
+ if "model" not in cache :
347
+ model = make_automl_regressor (
348
+ temporary_directory = cache .path ("backend" ),
349
+ delete_tmp_folder_after_terminate = False ,
350
+ include = {"regressor" : ["k_nearest_neighbors" ]}, # Just a meh model
351
+ get_trials_callback = stop_at_first ,
352
+ )
353
+
354
+ rand = np .random .RandomState (2 )
355
+ _X = rand .random ((100 , 50 ))
356
+ _y = rand .random ((100 ,))
357
+
358
+ X , Xt , y , yt = sklearn .model_selection .train_test_split (
359
+ _X , _y , random_state = 1 # Required to ensure dummy is best
360
+ )
361
+ model .fit (X , y , dataset_name = "random" )
362
+
363
+ # We now validate that indeed, the only model is the Dummy
364
+ members = list (model .models_ .values ())
365
+ if len (members ) != 1 and not isinstance (members [0 ], MyDummyRegressor ):
366
+ raise ValueError ("Should only have one model, dummy\n " , members )
367
+
368
+ cache .save (model , "model" )
369
+
370
+ model = cache .load ("model" )
371
+ model ._backend = copy_backend (old = model ._backend , new = make_backend ())
372
+
373
+ return model
0 commit comments