Skip to content

Commit e191b83

Browse files
Revert "Add wrappers for synchronous GPUDirect Storage APIs (pytorch#130633)"
This reverts commit 709ddf7. Reverted pytorch#130633 on behalf of https://github.com/clee2000 due to still failing internally D60265673 ([comment](pytorch#130633 (comment)))
1 parent e4db5dc commit e191b83

20 files changed

+1
-391
lines changed

BUILD.bazel

-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,6 @@ cc_library(
413413
"@cuda//:nvrtc",
414414
"@cudnn",
415415
"@cudnn_frontend",
416-
"@cuda//:cufile",
417416
],
418417
alwayslink = True,
419418
)

CMakeLists.txt

-9
Original file line numberDiff line numberDiff line change
@@ -251,15 +251,6 @@ cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
251251
cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
252252
"USE_CUDNN" OFF)
253253
cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF)
254-
# Binary builds will fail for cufile due to https://github.com/pytorch/builder/issues/1924
255-
# Using TH_BINARY_BUILD to check whether is binary build.
256-
# USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not properly defined here
257-
if(DEFINED ENV{TH_BINARY_BUILD})
258-
cmake_dependent_option(USE_CUFILE "Use cuFile" ON
259-
"USE_CUDA AND NOT $ENV{TH_BINARY_BUILD} AND NOT WIN32" OFF)
260-
else()
261-
cmake_dependent_option(USE_CUFILE "Use cuFile" ON "USE_CUDA AND NOT WIN32" OFF)
262-
endif()
263254
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
264255
option(USE_KINETO "Use Kineto profiling library" ON)
265256
option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)

build_variables.bzl

-1
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,6 @@ libtorch_python_cuda_core_sources = [
773773
"torch/csrc/cuda/shared/cudart.cpp",
774774
"torch/csrc/cuda/shared/nvtx.cpp",
775775
"torch/csrc/cuda/utils.cpp",
776-
"torch/csrc/cuda/GdsFile.cpp",
777776
]
778777

779778
libtorch_python_cuda_sources = libtorch_python_cuda_core_sources + [

caffe2/CMakeLists.txt

-4
Original file line numberDiff line numberDiff line change
@@ -928,10 +928,6 @@ elseif(USE_CUDA)
928928
torch_compile_options(torch_cuda) # see cmake/public/utils.cmake
929929
target_compile_definitions(torch_cuda PRIVATE USE_CUDA)
930930

931-
if(USE_CUFILE)
932-
target_link_libraries(torch_cuda PRIVATE torch::cufile)
933-
target_compile_definitions(torch_cuda PRIVATE USE_CUFILE)
934-
endif()
935931
if(USE_CUSPARSELT)
936932
target_link_libraries(torch_cuda PRIVATE torch::cusparselt)
937933
target_compile_definitions(torch_cuda PRIVATE USE_CUSPARSELT)

cmake/Dependencies.cmake

+1-5
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ if(USE_CUDA)
3939
set(CAFFE2_USE_CUDA ${USE_CUDA})
4040
set(CAFFE2_USE_CUDNN ${USE_CUDNN})
4141
set(CAFFE2_USE_CUSPARSELT ${USE_CUSPARSELT})
42-
set(CAFFE2_USE_CUFILE ${USE_CUFILE})
4342
set(CAFFE2_USE_NVRTC ${USE_NVRTC})
4443
include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake)
4544
if(CAFFE2_USE_CUDA)
@@ -61,9 +60,6 @@ if(USE_CUDA)
6160
else()
6261
caffe2_update_option(USE_CUSPARSELT OFF)
6362
endif()
64-
if(CAFFE2_USE_CUFILE)
65-
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS torch::cufile)
66-
endif()
6763
find_program(SCCACHE_EXECUTABLE sccache)
6864
if(SCCACHE_EXECUTABLE)
6965
# Using RSP/--options-file renders output noncacheable by sccache
@@ -83,7 +79,6 @@ if(USE_CUDA)
8379
set(CAFFE2_USE_CUDA OFF)
8480
set(CAFFE2_USE_CUDNN OFF)
8581
set(CAFFE2_USE_CUSPARSELT OFF)
86-
set(CAFFE2_USE_CUFILE OFF)
8782
set(CAFFE2_USE_NVRTC OFF)
8883
endif()
8984
endif()
@@ -1040,6 +1035,7 @@ if(USE_ROCM)
10401035
caffe2_update_option(USE_SYSTEM_NCCL ON)
10411036
endif()
10421037

1038+
10431039
list(APPEND HIP_CXX_FLAGS -fPIC)
10441040
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_AMD__=1)
10451041
list(APPEND HIP_CXX_FLAGS -DCUDA_HAS_FP16=1)

cmake/Modules/FindCUDAToolkit.cmake

-8
Original file line numberDiff line numberDiff line change
@@ -978,14 +978,6 @@ if(CUDAToolkit_FOUND)
978978
_CUDAToolkit_find_and_add_import_lib(cublas_static DEPS culibos)
979979
endif()
980980

981-
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.4)
982-
_CUDAToolkit_find_and_add_import_lib(cuFile ALT cufile DEPS culibos)
983-
_CUDAToolkit_find_and_add_import_lib(cuFile_static ALT cufile_static DEPS culibos)
984-
985-
_CUDAToolkit_find_and_add_import_lib(cuFile_rdma ALT cufile_rdma DEPS cuFile culibos)
986-
_CUDAToolkit_find_and_add_import_lib(cuFile_rdma_static ALT cufile_rdma_static DEPS cuFile_static culibos)
987-
endif()
988-
989981
# cuFFTW depends on cuFFT
990982
_CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft)
991983
_CUDAToolkit_find_and_add_import_lib(cufftw_static DEPS cufft_static)

cmake/Summary.cmake

-4
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ function(caffe2_print_configuration_summary)
7474
message(STATUS " CUDA static link : ${CAFFE2_STATIC_LINK_CUDA}")
7575
message(STATUS " USE_CUDNN : ${USE_CUDNN}")
7676
message(STATUS " USE_CUSPARSELT : ${USE_CUSPARSELT}")
77-
message(STATUS " USE_CUFILE : ${USE_CUFILE}")
7877
message(STATUS " CUDA version : ${CUDA_VERSION}")
7978
message(STATUS " USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}")
8079
message(STATUS " USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}")
@@ -84,9 +83,6 @@ function(caffe2_print_configuration_summary)
8483
if(${USE_CUSPARSELT})
8584
message(STATUS " cuSPARSELt version : ${CUSPARSELT_VERSION}")
8685
endif()
87-
if(${USE_CUFILE})
88-
message(STATUS " cufile library : ${CUDA_cuFile_LIBRARY}")
89-
endif()
9086
message(STATUS " CUDA root directory : ${CUDA_TOOLKIT_ROOT_DIR}")
9187
message(STATUS " CUDA library : ${CUDA_cuda_driver_LIBRARY}")
9288
message(STATUS " cudart library : ${CUDA_cudart_LIBRARY}")

cmake/public/cuda.cmake

-16
Original file line numberDiff line numberDiff line change
@@ -244,22 +244,6 @@ else()
244244
message(STATUS "USE_CUSPARSELT is set to 0. Compiling without cuSPARSELt support")
245245
endif()
246246

247-
# cufile
248-
if(CAFFE2_USE_CUFILE)
249-
add_library(torch::cufile INTERFACE IMPORTED)
250-
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
251-
set_property(
252-
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
253-
CUDA::cuFile_static)
254-
else()
255-
set_property(
256-
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
257-
CUDA::cuFile)
258-
endif()
259-
else()
260-
message(STATUS "USE_CUFILE is set to 0. Compiling without cuFile support")
261-
endif()
262-
263247
# curand
264248
add_library(caffe2::curand INTERFACE IMPORTED)
265249
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)

docs/source/cuda.rst

-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ See the :doc:`documentation <cuda._sanitizer>` for information on how to use it.
181181
.. for tracking purposes
182182
.. py:module:: torch.cuda.comm
183183
.. py:module:: torch.cuda.error
184-
.. py:module:: torch.cuda.gds
185184
.. py:module:: torch.cuda.graphs
186185
.. py:module:: torch.cuda.jiterator
187186
.. py:module:: torch.cuda.memory

setup.py

-3
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@
3838
# USE_CUSPARSELT=0
3939
# disables the cuSPARSELt build
4040
#
41-
# USE_CUFILE=0
42-
# disables the cuFile build
43-
#
4441
# USE_FBGEMM=0
4542
# disables the FBGEMM build
4643
#

test/test_cuda.py

-46
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
from itertools import product
1818
from random import randint
1919

20-
import psutil
21-
2220
import torch
2321
import torch.cuda
2422
from torch import inf, nan
@@ -64,7 +62,6 @@
6462
skipIfRocm,
6563
slowTest,
6664
subtest,
67-
TemporaryFileName,
6865
TEST_CUDA,
6966
TEST_CUDA_GRAPH,
7067
TEST_NUMPY,
@@ -4025,15 +4022,6 @@ def test_device_count_not_cached_pre_init(self):
40254022
x = torch.cuda.device_count()
40264023
self.assertEqual(f"{x}, 1", r)
40274024

4028-
def test_gds_fails_in_ci(self):
4029-
if IS_WINDOWS or TEST_WITH_ROCM:
4030-
error_msg = "is not supported on this platform"
4031-
else:
4032-
error_msg = "cuFileHandleRegister failed"
4033-
with TemporaryFileName() as f:
4034-
with self.assertRaisesRegex(RuntimeError, error_msg):
4035-
file = torch.cuda.gds._GdsFile(f, os.O_CREAT | os.O_RDWR)
4036-
40374025

40384026
@torch.testing._internal.common_utils.markDynamoStrictTest
40394027
class TestCudaMallocAsync(TestCase):
@@ -5181,40 +5169,6 @@ def test_graph_grad_scaling(self, device, dtype, optim_info, foreach, fused):
51815169
self.assertEqual(scaler._growth_tracker, growth_tracker)
51825170

51835171

5184-
class TestGDS(TestCase):
5185-
def _get_tmp_dir_fs_type(self):
5186-
my_path = os.path.realpath("/tmp")
5187-
root_type = ""
5188-
for part in psutil.disk_partitions():
5189-
if part.mountpoint == "/":
5190-
root_type = part.fstype
5191-
continue
5192-
if part.mountpoint == my_path:
5193-
return part.fstype
5194-
return root_type
5195-
5196-
@unittest.skipIf(IS_WINDOWS or TEST_WITH_ROCM, "Not supported on Windows or ROCm")
5197-
def test_gds_read_write_tensors(self):
5198-
if self._get_tmp_dir_fs_type() not in ("ext4", "xfs"):
5199-
self.skipTest("GPUDirect Storage requires ext4/xfs for local filesystem")
5200-
src1 = torch.randn(1024, device="cuda")
5201-
src2 = torch.randn(2, 1024, device="cuda")
5202-
torch.cuda.gds._gds_register_buffer(src1.untyped_storage())
5203-
torch.cuda.gds._gds_register_buffer(src2.untyped_storage())
5204-
dest1 = torch.empty(1024, device="cuda")
5205-
dest2 = torch.empty(2, 1024, device="cuda")
5206-
with TemporaryFileName() as f:
5207-
file = torch.cuda.gds._GdsFile(f, os.O_CREAT | os.O_RDWR)
5208-
file.save_storage(src1.untyped_storage(), offset=0)
5209-
file.save_storage(src2.untyped_storage(), offset=src1.nbytes)
5210-
file.load_storage(dest1.untyped_storage(), offset=0)
5211-
file.load_storage(dest2.untyped_storage(), offset=src1.nbytes)
5212-
self.assertEqual(src1, dest1)
5213-
self.assertEqual(src2, dest2)
5214-
torch.cuda.gds._gds_deregister_buffer(src1.untyped_storage())
5215-
torch.cuda.gds._gds_deregister_buffer(src2.untyped_storage())
5216-
5217-
52185172
instantiate_parametrized_tests(TestCuda)
52195173
instantiate_parametrized_tests(TestCudaMallocAsync)
52205174
instantiate_device_type_tests(TestCudaOptims, globals())

third_party/cuda.BUILD

-6
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,6 @@ cc_library(
6060
visibility = ["//visibility:public"],
6161
)
6262

63-
cc_library(
64-
name = "cufile",
65-
srcs = ["targets/x86_64-linux/lib/libcufile.so"],
66-
visibility = ["//visibility:public"],
67-
)
68-
6963
cc_library(
7064
name = "nvrtc",
7165
srcs = [

torch/CMakeLists.txt

-4
Original file line numberDiff line numberDiff line change
@@ -312,10 +312,6 @@ if(USE_NUMPY)
312312
target_compile_definitions(torch_python PRIVATE USE_NUMPY)
313313
endif()
314314

315-
if(USE_CUFILE AND NOT USE_ROCM)
316-
target_compile_definitions(torch_python PRIVATE USE_CUFILE)
317-
endif()
318-
319315
if(HAVE_SOVERSION)
320316
set_target_properties(torch_python PROPERTIES
321317
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})

torch/_C/__init__.pyi.in

-8
Original file line numberDiff line numberDiff line change
@@ -1975,14 +1975,6 @@ def _can_use_cudnn_attention(params: _SDPAParams, debug: _bool) -> _bool: ...
19751975
def _can_use_flash_attention(params: _SDPAParams, debug: _bool) -> _bool: ...
19761976
def _can_use_mem_efficient_attention(params: _SDPAParams, debug: _bool) -> _bool: ...
19771977

1978-
# Defined in torch/csrc/cuda/GdsFile.cpp
1979-
def _gds_register_buffer(t: Storage) -> None: ...
1980-
def _gds_deregister_buffer(t: Storage) -> None: ...
1981-
def _gds_register_handle(fd: _int) -> _int: ...
1982-
def _gds_deregister_handle(handle: _int) -> None: ...
1983-
def _gds_load_storage(handle: _int, s: Storage, offset: _int) -> None: ...
1984-
def _gds_save_storage(handle: _int, s: Storage, offset: _int) -> None: ...
1985-
19861978
# Defined in torch/csrc/cuda/python_comm.cpp
19871979
def _broadcast(tensor: Tensor, devices: List[_int]) -> List[Tensor]: ...
19881980
def _broadcast_out(tensor: Tensor, out_tensors: List[Tensor]) -> List[Tensor]: ...

torch/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,6 @@ def _load_global_deps() -> None:
306306
"cuda_runtime": "libcudart.so.*[0-9]",
307307
"cuda_cupti": "libcupti.so.*[0-9]",
308308
"cufft": "libcufft.so.*[0-9]",
309-
"cufile": "libcufile.so.*[0-9]",
310309
"curand": "libcurand.so.*[0-9]",
311310
"nvjitlink": "libnvJitLink.so.*[0-9]",
312311
"cusparse": "libcusparse.so.*[0-9]",

0 commit comments

Comments
 (0)