Skip to content

Commit ef449a4

Browse files
nopepperjhinpan
authored andcommitted
🔍 Update GRPO config documentation for beta parameter stability (huggingface#2992)
1 parent 23c8a52 commit ef449a4

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

trl/trainer/grpo_config.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class GRPOConfig(TrainingArguments):
103103
[`~transformers.TrainingArguments`].
104104
beta (`float`, *optional*, defaults to `0.04`):
105105
KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving training
106-
speed.
106+
speed, but may be numerically unstable for long training runs.
107107
num_iterations (`int`, *optional*, defaults to `1`):
108108
Number of iterations per batch (denoted as μ in the algorithm).
109109
epsilon (`float`, *optional*, defaults to `0.2`):
@@ -264,7 +264,7 @@ class GRPOConfig(TrainingArguments):
264264
default=0.04,
265265
metadata={
266266
"help": "KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving "
267-
"training speed."
267+
"training speed, but may be numerically unstable for long training runs."
268268
},
269269
)
270270
num_iterations: int = field(

0 commit comments

Comments
 (0)