Skip to content

Commit f91fc56

Browse files
CUDA: fix Gemma 2 numerical issues for FA (ggml-org#9166)
1 parent e11bd85 commit f91fc56

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

src/llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8877,7 +8877,7 @@ static struct ggml_tensor * llm_build_kqv(
88778877
cur = ggml_flash_attn_ext(ctx, q, k, v, kq_mask, kq_scale, hparams.f_max_alibi_bias,
88788878
hparams.attn_soft_cap ? hparams.f_attn_logit_softcapping : 0.0f);
88798879

8880-
if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX) {
8880+
if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX || model.arch == LLM_ARCH_GEMMA2) {
88818881
ggml_flash_attn_ext_set_prec(cur, GGML_PREC_F32);
88828882
}
88838883

0 commit comments

Comments
 (0)