@@ -125,7 +125,7 @@ def test_training_peft(self):
125
125
126
126
self .assertIsNotNone (trainer .state .log_history [- 1 ]["train_loss" ])
127
127
128
- # Check the peft params have changed and the base model params have not changed
128
+ # Check that the peft params have changed and the base model params have not changed
129
129
for n , param in previous_trainable_params .items ():
130
130
new_param = trainer .model .get_parameter (n )
131
131
if n in base_param_names : # We expect the base model params to be the same
@@ -168,7 +168,7 @@ def test_training_different_reward_model(self):
168
168
169
169
self .assertIsNotNone (trainer .state .log_history [- 1 ]["train_loss" ])
170
170
171
- # Check the params have changed
171
+ # Check that the params have changed
172
172
for n , param in previous_trainable_params .items ():
173
173
new_param = trainer .model .get_parameter (n )
174
174
self .assertFalse (torch .equal (param , new_param ), f"Parameter { n } has not changed." )
@@ -203,7 +203,7 @@ def reward_func(completions, **kwargs):
203
203
204
204
self .assertIsNotNone (trainer .state .log_history [- 1 ]["train_loss" ])
205
205
206
- # Check the params have changed
206
+ # Check that the params have changed
207
207
for n , param in previous_trainable_params .items ():
208
208
new_param = trainer .model .get_parameter (n )
209
209
self .assertFalse (torch .equal (param , new_param ), f"Parameter { n } has not changed." )
@@ -239,7 +239,7 @@ def reward_func(completions, **kwargs):
239
239
240
240
self .assertIsNotNone (trainer .state .log_history [- 1 ]["train_loss" ])
241
241
242
- # Check the params have changed
242
+ # Check that the params have changed
243
243
for n , param in previous_trainable_params .items ():
244
244
new_param = trainer .model .get_parameter (n )
245
245
self .assertFalse (torch .equal (param , new_param ), f"Parameter { n } has not changed." )
@@ -278,7 +278,7 @@ def reward_func2(completions, **kwargs):
278
278
279
279
self .assertIsNotNone (trainer .state .log_history [- 1 ]["train_loss" ])
280
280
281
- # Check the params have changed
281
+ # Check that the params have changed
282
282
for n , param in previous_trainable_params .items ():
283
283
new_param = trainer .model .get_parameter (n )
284
284
self .assertFalse (torch .equal (param , new_param ), f"Parameter { n } has not changed." )
@@ -356,7 +356,7 @@ def reward_func(completions, **kwargs):
356
356
357
357
self .assertIsNotNone (trainer .state .log_history [- 1 ]["train_loss" ])
358
358
359
- # Check the params have changed
359
+ # Check that the params have changed
360
360
for n , param in previous_trainable_params .items ():
361
361
new_param = trainer .model .get_parameter (n )
362
362
self .assertFalse (torch .equal (param , new_param ), f"Parameter { n } has not changed." )
@@ -395,7 +395,7 @@ def reward_func(completions, some_values, **kwargs):
395
395
396
396
self .assertIsNotNone (trainer .state .log_history [- 1 ]["train_loss" ])
397
397
398
- # Check the params have changed
398
+ # Check that the params have changed
399
399
for n , param in previous_trainable_params .items ():
400
400
new_param = trainer .model .get_parameter (n )
401
401
self .assertFalse (torch .equal (param , new_param ), f"Parameter { n } has not changed." )
@@ -416,9 +416,10 @@ def test_training_vllm(self):
416
416
report_to = "none" ,
417
417
use_vllm = True ,
418
418
vllm_device = "cuda:0" , # will raise a warning, but allows this test to work with only one GPU
419
+ vllm_gpu_memory_utilization = 0.5 , # reduce since because we use the same device for training and vllm
419
420
)
420
421
trainer = GRPOTrainer (
421
- model = "trl-internal-testing/small-Qwen2ForCausalLM-2.5" ,
422
+ model = "Qwen/Qwen2.5-0.5B-Instruct" , # tiny is too small for vLLM
422
423
reward_funcs = "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5" ,
423
424
args = training_args ,
424
425
train_dataset = dataset ,
@@ -504,6 +505,8 @@ def test_training_with_sync_ref_model(self):
504
505
@require_peft
505
506
def test_training_vllm_and_peft (self ):
506
507
"""Test that training works with vLLM for generation."""
508
+ model = AutoModelForCausalLM .from_pretrained ("Qwen/Qwen2.5-0.5B-Instruct" ) # tiny model is too small for vLLM
509
+ base_param_names = [f"base_model.model.{ n } " for n , _ in model .named_parameters ()]
507
510
dataset = load_dataset ("trl-internal-testing/zen" , "standard_prompt_only" , split = "train" )
508
511
509
512
with tempfile .TemporaryDirectory () as tmp_dir :
@@ -513,14 +516,22 @@ def test_training_vllm_and_peft(self):
513
516
per_device_train_batch_size = 3 , # reduce the batch size to reduce memory usage
514
517
num_generations = 3 , # reduce the number of generations to reduce memory usage
515
518
max_completion_length = 32 , # reduce the completion length to reduce memory usage
516
- use_vllm = True ,
517
519
report_to = "none" ,
520
+ use_vllm = True ,
521
+ vllm_device = "cuda:0" , # will raise a warning, but allows this test to work with only one GPU
522
+ vllm_gpu_memory_utilization = 0.5 , # reduce since because we use the same device for training and vllm
523
+ )
524
+ lora_config = LoraConfig (
525
+ target_modules = "all-linear" ,
526
+ # test with non-default modules as it add extra keys in state_dict tht we need to handle
527
+ modules_to_save = ["embed_tokens" , "lm_head" ],
518
528
)
519
529
trainer = GRPOTrainer (
520
- model = "trl-internal-testing/small-Qwen2ForCausalLM-2.5" ,
530
+ model = model ,
521
531
reward_funcs = "trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5" ,
522
532
args = training_args ,
523
533
train_dataset = dataset ,
534
+ peft_config = lora_config ,
524
535
)
525
536
526
537
previous_trainable_params = {n : param .clone () for n , param in trainer .model .named_parameters ()}
@@ -529,7 +540,11 @@ def test_training_vllm_and_peft(self):
529
540
530
541
self .assertIsNotNone (trainer .state .log_history [- 1 ]["train_loss" ])
531
542
532
- # Check that the params have changed
543
+ # Check that the peft params have changed and the base model params have not changed
533
544
for n , param in previous_trainable_params .items ():
534
545
new_param = trainer .model .get_parameter (n )
535
- self .assertFalse (torch .equal (param , new_param ), f"Parameter { n } has not changed." )
546
+ if n in base_param_names : # We expect the base model params to be the same
547
+ self .assertTrue (torch .allclose (param , new_param ), f"Parameter { n } has changed." )
548
+ elif "base_layer" not in n and "original_module" not in n :
549
+ # We expect the peft params to be different (except for the base layer)
550
+ self .assertFalse (torch .allclose (param , new_param ), f"Parameter { n } has not changed." )
0 commit comments