We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 460c188 commit 93548ebCopy full SHA for 93548eb
csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
@@ -38,13 +38,7 @@ bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) {
38
if (cuda_device_capability >= 90) {
39
return CUDA_VERSION >= 12000;
40
} else if (cuda_device_capability >= 89) {
41
- // CUTLASS Kernels have not been tuned for Ada Lovelace systems
42
- // and are slower than torch.mm. Return false unconditionally in this case.
43
- return false;
44
-
45
- // Once the CUTLASS kernels have been optimized for Lovelace systems,
46
- // use the following check:
47
- // return CUDA_VERSION >= 12040;
+ return CUDA_VERSION >= 12040;
48
}
49
#endif
50
0 commit comments