File tree 1 file changed +2
-2
lines changed
1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -103,7 +103,7 @@ class GRPOConfig(TrainingArguments):
103
103
[`~transformers.TrainingArguments`].
104
104
beta (`float`, *optional*, defaults to `0.04`):
105
105
KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving training
106
- speed.
106
+ speed, but may be numerically unstable for long training runs .
107
107
num_iterations (`int`, *optional*, defaults to `1`):
108
108
Number of iterations per batch (denoted as μ in the algorithm).
109
109
epsilon (`float`, *optional*, defaults to `0.2`):
@@ -264,7 +264,7 @@ class GRPOConfig(TrainingArguments):
264
264
default = 0.04 ,
265
265
metadata = {
266
266
"help" : "KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving "
267
- "training speed."
267
+ "training speed, but may be numerically unstable for long training runs ."
268
268
},
269
269
)
270
270
num_iterations : int = field (
You can’t perform that action at this time.
0 commit comments