Accuracy fix for llama3.1-70B in eager/torch.compile mode (#1746)

Co-authored-by: Vivek Goel <[email protected]>
huggingface · Feb 7, 2025 · a0d14d2 · a0d14d2
1 parent 3d7b2fa
commit a0d14d2
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/optimum/habana/transformers/models/llama/modeling_llama.py b/optimum/habana/transformers/models/llama/modeling_llama.py
@@ -136,7 +136,8 @@ def __init__(
 
     def _set_cos_sin_cache(self, seq_len, device, dtype):
         self.max_seq_len_cached = seq_len
-        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+        # Use torch.int32 to avoid loss due to low precision with BF16 (refer to SW-215204)
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int32)
 
         freqs = torch.outer(t, self.inv_freq)
         # Different from paper, but it uses a different permutation in order to obtain the same calculation