IST-DASLab · xxrjun · Sep 19, 2025
diff --git a/benchmarks/bench_mxfp4.py b/benchmarks/bench_mxfp4.py
@@ -14,8 +14,7 @@
 # limitations under the License.
 #
 
-import sys
-import numpy as np
+import os
 import torch
 import triton
 from scipy.linalg import hadamard
@@ -160,10 +159,11 @@ def benchmark(batch_size, provider, N, K, had_size):
     for K, N in layers:
         for had_size in [32,64,128]:
             print(f"{model}, N={N} K={K}, HAD={had_size}, BF16 vs MXFP4 GEMMs TFLOP/s:")
+            os.makedirs(f"benchmarks_output/bench_mxfp4_res_n{N}_k{K}", exist_ok=True)
             benchmark.run(
                 print_data=True,
                 show_plots=True,
-                save_path=f"bench_mxfp4_res_n{N}_k{K}",
+                save_path=f"benchmarks_output/bench_mxfp4_res_n{N}_k{K}",
                 N=N,
                 K=K,
                 had_size=had_size

diff --git a/benchmarks/bench_mxfp4_sm100.py b/benchmarks/bench_mxfp4_sm100.py
@@ -14,8 +14,7 @@
 # limitations under the License.
 #
 
-import sys
-import numpy as np
+import os
 import torch
 import triton
 from scipy.linalg import hadamard
@@ -160,10 +159,11 @@ def benchmark(batch_size, provider, N, K, had_size):
     for K, N in layers:
         for had_size in [32,64,128]:
             print(f"{model}, N={N} K={K}, HAD={had_size}, BF16 vs MXFP4 GEMMs TFLOP/s:")
+            os.makedirs(f"benchmarks_output/bench_mxfp4_res_n{N}_k{K}_sm100", exist_ok=True)
             benchmark.run(
                 print_data=True,
                 show_plots=True,
-                save_path=f"bench_mxfp4_res_n{N}_k{K}",
+                save_path=f"benchmarks_output/bench_mxfp4_res_n{N}_k{K}_sm100",
                 N=N,
                 K=K,
                 had_size=had_size

diff --git a/benchmarks/bench_nvfp4.py b/benchmarks/bench_nvfp4.py
@@ -14,8 +14,7 @@
 # limitations under the License.
 #
 
-import sys
-import numpy as np
+import os
 import torch
 import triton
 from scipy.linalg import hadamard
@@ -150,10 +149,11 @@ def benchmark(batch_size, provider, N, K, had_size):
     for K, N in layers:
         for had_size in [16,32,64,128]:
             print(f"{model}, N={N} K={K}, HAD={had_size}, BF16 vs NVFP4 GEMMs TFLOP/s:")
+            os.makedirs(f"benchmarks_output/bench_nvfp4_res_n{N}_k{K}", exist_ok=True)
             benchmark.run(
                 print_data=True,
                 show_plots=True,
-                save_path=f"bench_nvfp4_res_n{N}_k{K}",
+                save_path=f"benchmarks_output/bench_nvfp4_res_n{N}_k{K}",
                 N=N,
                 K=K,
                 had_size=had_size

diff --git a/benchmarks/bench_nvfp4_sm100.py b/benchmarks/bench_nvfp4_sm100.py
@@ -14,8 +14,7 @@
 # limitations under the License.
 #
 
-import sys
-import numpy as np
+import os
 import torch
 import triton
 from scipy.linalg import hadamard
@@ -160,10 +159,11 @@ def benchmark(batch_size, provider, N, K, had_size):
     for K, N in layers:
         for had_size in [16,32,64,128]:
             print(f"{model}, N={N} K={K}, HAD={had_size}, BF16 vs NVFP4 GEMMs TFLOP/s:")
+            os.makedirs(f"benchmarks_output/bench_nvfp4_res_n{N}_k{K}_sm100", exist_ok=True)
             benchmark.run(
                 print_data=True,
                 show_plots=True,
-                save_path=f"bench_nvfp4_res_n{N}_k{K}",
+                save_path=f"benchmarks_output/bench_nvfp4_res_n{N}_k{K}_sm100",
                 N=N,
                 K=K,
                 had_size=had_size