Skip to content

[Do NOT MERGE] [release/2.5] Enable tf32 testing on test_nn #1859

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: release/2.5
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions aten/src/ATen/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ void Context::setBenchmarkLimitCuDNN(int b) {

bool Context::allowTF32CuBLAS() const {
#ifdef USE_ROCM
const static auto allow_tf32 = c10::utils::check_env(hipblaslt_allow_tf32);
auto allow_tf32 = c10::utils::check_env(hipblaslt_allow_tf32);
if (allow_tf32 != true) {
return false;
}
Expand All @@ -243,7 +243,7 @@ bool Context::allowTF32CuBLAS() const {

void Context::setAllowTF32CuBLAS(bool b) {
#ifdef USE_ROCM
const static auto allow_tf32 = c10::utils::check_env(hipblaslt_allow_tf32);
auto allow_tf32 = c10::utils::check_env(hipblaslt_allow_tf32);
if (allow_tf32 != true) {
LOG(INFO) << "torch.backends.cuda.matmul.allow_tf32 is not supported on ROCm by default. "
<< "Please set environment variable HIPBLASLT_ALLOW_TF32=1 to enable it.";
Expand Down
25 changes: 24 additions & 1 deletion torch/testing/_internal/common_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@
import functools
import torch
import torch.cuda
from torch.testing._internal.common_utils import LazyVal, TEST_NUMBA, TEST_WITH_ROCM, TEST_CUDA, IS_WINDOWS
from torch.testing._internal.common_utils import (
LazyVal,
MI300_ARCH,
TEST_NUMBA,
TEST_WITH_ROCM,
TEST_CUDA,
IS_WINDOWS,
)
import inspect
import contextlib
import os
Expand Down Expand Up @@ -118,7 +125,15 @@ def initialize_cuda_context_rng():
# Test whether hardware TF32 math mode enabled. It is enabled only on:
# - CUDA >= 11
# - arch >= Ampere
#More--
# For AMD GPUs, tf32 is supported on mi300.
def tf32_is_not_fp32():
if torch.version.hip:
prop = torch.cuda.get_device_properties(torch.cuda.current_device())
if prop.gcnArchName.split(":")[0] in MI300_ARCH:
return True
else:
return False
if not torch.cuda.is_available() or torch.version.cuda is None:
return False
if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8:
Expand All @@ -141,6 +156,9 @@ def tf32_off():

@contextlib.contextmanager
def tf32_on(self, tf32_precision=1e-5):
if torch.version.hip:
hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None)
os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
old_allow_tf32_matmul = torch.backends.cuda.matmul.allow_tf32
old_precision = self.precision
try:
Expand All @@ -149,6 +167,11 @@ def tf32_on(self, tf32_precision=1e-5):
with torch.backends.cudnn.flags(enabled=None, benchmark=None, deterministic=None, allow_tf32=True):
yield
finally:
if torch.version.hip:
if hip_allow_tf32 is not None:
os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32
else:
del os.environ["HIPBLASLT_ALLOW_TF32"]
torch.backends.cuda.matmul.allow_tf32 = old_allow_tf32_matmul
self.precision = old_precision

Expand Down