[not for land] testing out float8 128_1_128_128 blockwise scaling

vkuzo · vkuzo · commit f7cd64cdb481 · 2025-06-18T06:07:44.000-07:00
Summary: Test drive of pytorch/ao#2386, not for land Test Plan: ```bash with-proxy CONFIG_FILE="./torchtitan/models/llama3/train_configs/debug_model.toml" ./run_train.sh --model.converters float8 --model.print_after_conversion ``` Reviewers: Subscribers: Tasks: Tags:
diff --git a/torchtitan/components/quantization/float8.py b/torchtitan/components/quantization/float8.py
@@ -101,6 +101,19 @@ def convert(self, model: nn.Module):
         if not self.enabled:
             return
 
+        from torchao.quantization import quantize_
+        from torchao.prototype.deep_gemm_float8_training.linear import (
+            DeepGemmFloat8LinearConfig,
+        )
+
+        quantize_(
+            model, 
+            config=DeepGemmFloat8LinearConfig(), 
+            filter_fn=lambda mod, fqn: isinstance(mod, torch.nn.Linear) and fqn != "output",
+        )
+        logger.info("enabled DeepGemm dense training")
+        return
+
         from torchao.float8 import convert_to_float8_training
 
         # Mutates the model inplace replacing instances of nn.Linear with Float8Linear