Build Torch from source (#4554)

* Updated Torch to v1.10.2 (CPU-only) * Removed Torch v1.4.0 which included Torch.jl wrapper * Skipped Torch.jl wrapper * With MKL dependency on MKL-platforms * Using protoc v3.13.0 JLL. * Added protoc as a build dependency to get correct version * Not using ONNX dependency to get past protoc issue * Added micromamba install of pyyaml and typing_extensions - needed for build. * Using XNNPACK JLL dependency * Added CPUInfo and PThreadPool dependencies * Added SLEEF dependency * Turned off some features explicitly to silence some configure warnings * Not using NNPACK, and QNNPACK, and limited PYTORCH_QNNPACK to x86_64. * Disabled use of breakpad on aarch64-linux-gnu * Enabled configure on Windows via patch and disabling breakpad * Disabled use of TensorPipe on linux-musl * Excluded unsupported powerpc64le and i686-windows platforms * Disabled kineto for w64 and freebsd * Disabled breakpad for FreeBSD * Disabled use of MKLDNN on macOS * Added Gloo dependency - to aid linux-musl * Disabled MKLDNN for linux-musl * Disabled FreeBSD as Clang v12 crashes * Disabled MKLDNN for w64-mingw32 * Using MKL, BLIS, or OpenBLAS + LAPACK - preferring MKL or BLIS * Restricted use of LAPACK to OpenBLAS platforms * Set preferred BLAS for armv6l-linux-gnu * Disabled FBGEMM for x86_64-w64-mingw32 * Added MKL_Headers as dependency * Disabled MKL for Windows as CMake cannot find MKL * Optimized git submodule update * Added note about disabling MKLDNN for x86_64-apple-darwin * Fixed a few warnings related to FBGEMM * Fixed windows warning related to TensorPipe * Disabled Metal to silence warning that it is only used on iOS * Silence cmake developer warnings * Disabled linux-musl and Windows * Added additional library product libtorch_cpu * Added SO version to libraries and disabled numpy * Set GLIBCXX_USE_CXX11_ABI - like official libtorch builds. * Added platform expansion for C++ string ABIs * Added dep build versions and/or compat * Disabled ARM 32-bit platforms * Fixup for FBGEMM warning on aarch64-apple-darwin * Added dependencies graph for pytorch wrt. xnnpack, pthreadpool and cpuinfo * Added CUDA 10.2 and CUDA 11.3 x86_64-linux-gnu platforms * Using CUDA_full v11.3 to use v11.3.1+1 which includes Thrust library. * Using CUDNN v8.2.4 for build version (similar to ONNXRuntime) * Added patch for cmake to find CUDA * Set CUDACXX to make cmake find CUDA * Added CUDA libraries manually - and enabled CUDNN * Added double-triple configure hack to make CUDA configure - To get past TRY_RUN for CUDA * Added CUDA headers to CMAKE_INCLUDE_PATH * Additional fixes for CUDA - and CUB * Set TMPDIR for nvcc * Added additional CUDA libraries
JuliaPackaging · Sep 13, 2022 · 74872fc · 74872fc
1 parent d6a5224
commit 74872fc
Show file tree

Hide file tree

Showing 4 changed files with 341 additions and 33 deletions.
diff --git a/T/Torch/build_tarballs.jl b/T/Torch/build_tarballs.jl
@@ -3,60 +3,266 @@
 using BinaryBuilder, Pkg
 
 name = "Torch"
-version = v"1.4.0"
+version = v"1.10.2"
 
 # Collection of sources required to complete build
 sources = [
-    GitSource("https://github.com/dhairyagandhi96/Torch.jl.git", "85bd08d39e7fba29ec4a643f60dd006ed8be8ede"),
-    ArchiveSource("https://download.pytorch.org/libtorch/cu101/libtorch-cxx11-abi-shared-with-deps-1.4.0.zip", "f214bfde532877aa5d4e0803e51a28fa8edd97b6a44b6615f75a70352b6b542e"),
-    ArchiveSource("https://github.com/JuliaGPU/CUDABuilder/releases/download/v0.3.0/CUDNN+CUDA10.1.v7.6.5.x86_64-linux-gnu.tar.gz", "79de5b5085a33bc144b87028e998a1d295a15c3424d6d45b25defe500f616974", unpack_target = "cudnn"),
+    GitSource("https://github.com/pytorch/pytorch.git", "71f889c7d265b9636b93ede9d651c0a9c4bee191"),
+    FileSource("https://micromamba.snakepit.net/api/micromamba/linux-64/0.21.1", "c907423887b43bec4e8b24f17471262c8087b7095683f41dcef4a4e24e9a3bbd"; filename = "micromamba.tar.bz2"),
+    ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v10.2.89%2B5/CUDA_full.v10.2.89.x86_64-linux-gnu.tar.gz", "60e6f614db3b66d955b7e6aa02406765e874ff475c69e2b4a04eb95ba65e4f3b"; unpack_target = "CUDA_full.v10.2"),
+    ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v11.3.1%2B1/CUDA_full.v11.3.1.x86_64-linux-gnu.tar.gz", "9ae00d36d39b04e8e99ace63641254c93a931dcf4ac24c8eddcdfd4625ab57d6"; unpack_target = "CUDA_full.v11.3"),
+    DirectorySource("./bundled"),
 ]
 
 # Bash recipe for building across all platforms
 script = raw"""
 cd $WORKSPACE/srcdir
 
-mv cudnn $prefix
-mv libtorch/share/* $prefix/share/
-mv libtorch/lib/* $prefix/lib/
-rm -r libtorch/lib
-rm -r libtorch/share
-mv libtorch/* $prefix
-rm -r libtorch
-
-mkdir -p /usr/local/cuda/lib64
-cd /usr/local/cuda/lib64
-ln -s ${prefix}/cuda/lib64/libcudart.so libcudart.so
-ln -s ${prefix}/cuda/lib64/libnvToolsExt.so libnvToolsExt.so
-
-cd $WORKSPACE/srcdir/Torch.jl/build
-mkdir build && cd build
-cmake -DCMAKE_PREFIX_PATH=$prefix -DTorch_DIR=$prefix/share/cmake/Torch -DCUDA_TOOLKIT_ROOT_DIR=$prefix/cuda ..
-cmake --build .
-
-mkdir -p "${libdir}"
-cp -r $WORKSPACE/srcdir/Torch.jl/build/build/*.${dlext} "${libdir}"
-rm -rf $prefix/cuda
-install_license ${WORKSPACE}/srcdir/Torch.jl/LICENSE
+mkdir micromamba
+cd micromamba
+tar xfj ../micromamba.tar.bz2
+export PATH=$PATH:$WORKSPACE/srcdir/micromamba/bin
+./bin/micromamba shell init -s bash -p ~/micromamba
+source ~/.bashrc
+micromamba activate
+micromamba install -y python=3.9 pyyaml typing_extensions -c conda-forge
+
+cd $WORKSPACE/srcdir/pytorch
+
+atomic_patch -p1 ../patches/pytorch-aten-qnnpack-cmake-windows.patch
+
+cmake_extra_args=""
+include_paths=""
+
+if [[ $bb_full_target == *cxx11* ]]; then
+    cmake_extra_args+="-DGLIBCXX_USE_CXX11_ABI=1 "
+fi
+
+if [[ $target == i686-linux-gnu*
+    || $target == x86_64-linux-gnu*
+    || $target == x86_64-apple-darwin*
+]]; then
+    cmake_extra_args+="-DBLAS=MKL "
+elif [[ $target == aarch64-linux-gnu*
+    || $bb_full_target == armv7l-linux-gnu*
+    || $target == x86_64-linux-musl*
+    || $target == x86_64-unknown-freebsd*
+    || $target == aarch64-apple-darwin*
+    || $target == i686-w64-mingw32*
+    || $target == x86_64-w64-mingw32*
+]]; then
+    cmake_extra_args+="-DBLAS=BLIS "
+elif [[ $bb_full_target == armv6l-linux-gnu* ]]; then
+    cmake_extra_args+="-DBLAS=OpenBLAS "
+fi
+
+if [[ $target == x86_64* ]]; then # Restricting PYTORCH_QNNPACK to x86_64: Adapted from https://salsa.debian.org/deeplearning-team/pytorch/-/blob/master/debian/rules
+    cmake_extra_args+="-DUSE_PYTORCH_QNNPACK=ON "
+else
+    cmake_extra_args+="-DUSE_PYTORCH_QNNPACK=OFF "
+fi
+
+if [[ $target == aarch64-linux-gnu* # Disabled use of breakpad on aarch64-linux-gnu: Fails to build embedded breakpad library.
+    || $target == *-w64-mingw32* # Disabling breakpad enables configure on Windows - in combination with pytorch-aten-qnnpack-cmake-windows.patch
+    || $target == *-freebsd*
+]]; then
+    cmake_extra_args+="-DUSE_BREAKPAD=OFF "
+else
+    cmake_extra_args+="-DUSE_BREAKPAD=ON "
+fi
+
+if [[ $target == *-linux-musl* # Disabled use of TensorPipe on linux-musl: Fails to build embedded TensorPipe library.
+    || $target == *-w64-mingw32* # TensorPipe cannot be used on Windows
+]]; then
+    cmake_extra_args+="-DUSE_TENSORPIPE=OFF "
+else
+    cmake_extra_args+="-DUSE_TENSORPIPE=ON "
+fi
+
+if [[ $target == *-w64-* || $target == *-freebsd* ]]; then
+    cmake_extra_args+="-DUSE_KINETO=OFF "
+fi
+
+# Gloo is only available for 64-bit x86_64 or aarch64 - and cmake currently cannot find Gloo on *-linux-gnu
+if [[ $target != arm-* && $target == *-linux-musl* ]]; then
+    cmake_extra_args+="-DUSE_SYSTEM_GLOO=ON "
+fi
+
+if [[ $target == aarch64-* # A compiler with AVX512 support is required for FBGEM
+    || $target == arm-* # A compiler with AVX512 support is required for FBGEM
+    || $target == i686-* # x64 operating system is required for FBGEMM
+    || $target == x86_64-w64-mingw32*
+]]; then
+    cmake_extra_args+="-DUSE_FBGEMM=OFF -DUSE_FAKELOWP=OFF "
+fi
+
+if [[ $target == x86_64-apple-darwin* # Fails to compile: /workspace/srcdir/pytorch/third_party/ideep/mkl-dnn/src/cpu/x64/jit_avx512_core_amx_conv_kernel.cpp:483:43: error: use of undeclared identifier 'noU';
+    || $target == *-w64-mingw32*
+    || $target == *-linux-musl* ]]; then
+    cmake_extra_args+="-DUSE_MKLDNN=OFF "
+fi
+
+if [[ $bb_full_target == *cuda* ]]; then
+    cuda_version=`echo $bb_full_target | sed -E -e 's/.*cuda\+([0-9]+\.[0-9]+).*/\1/'`
+    cuda_version_major=`echo $cuda_version | cut -d . -f 1`
+    cuda_version_minor=`echo $cuda_version | cut -d . -f 2`
+    cuda_full_path="$WORKSPACE/srcdir/CUDA_full.v$cuda_version/cuda"
+    export PATH=$PATH:$cuda_full_path/bin
+    export CUDACXX=$cuda_full_path/bin/nvcc
+    export CUDAHOSTCXX=$CXX
+    mkdir $WORKSPACE/tmpdir
+    export TMPDIR=$WORKSPACE/tmpdir
+    cmake_extra_args+="\
+        -DUSE_CUDA=ON \
+        -DUSE_CUDNN=ON \
+        -DUSE_MAGMA=ON \
+        -DCUDA_TOOLKIT_ROOT_DIR=$cuda_full_path \
+        -DCUDA_CUDART_LIBRARY=$cuda_full_path/lib64/libcudart.$dlext \
+        -DCMAKE_CUDA_FLAGS='-cudart shared' \
+        -DCUDA_cublas_LIBRARY=$cuda_full_path/lib64/libcublas.$dlext \
+        -DCUDA_cufft_LIBRARY=$cuda_full_path/lib64/libcufft.$dlext \
+        -DCUDA_curand_LIBRARY=$cuda_full_path/lib64/libcurand.$dlext \
+        -DCUDA_cusolver_LIBRARY=$cuda_full_path/lib64/libcusolver.$dlext \
+        -DCUDA_cusparse_LIBRARY=$cuda_full_path/lib64/libcusparse.$dlext \
+        -DCUDA_TOOLKIT_INCLUDE=$includedir;$cuda_full_path/include \
+        -DCUB_INCLUDE_DIR=$WORKSPACE/srcdir/pytorch/third_party/cub "
+    include_paths+=":$cuda_full_path/include"
+    micromamba install -y magma-cuda${cuda_version_major}${cuda_version_minor} -c pytorch
+    git submodule update --init \
+        third_party/cub \
+        third_party/cudnn_frontend
+else
+    cmake_extra_args+="-DUSE_CUDA=OFF -DUSE_MAGMA=OFF "
+fi
+
+git submodule update --init \
+    third_party/FP16 \
+    third_party/FXdiv \
+    third_party/eigen \
+    third_party/fbgemm \
+    third_party/fmt \
+    third_party/foxi \
+    third_party/gloo \
+    third_party/kineto \
+    third_party/onnx \
+    third_party/psimd \
+    third_party/tensorpipe
+git submodule update --init --recursive \
+    third_party/breakpad \
+    third_party/ideep
+cd third_party/fbgemm && git submodule update --init third_party/asmjit && cd ../..
+cd third_party/tensorpipe && git submodule update --init third_party/libnop third_party/libuv && cd ../..
+mkdir build
+cd build
+configure() {
+    cmake \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DCMAKE_INSTALL_PREFIX=$prefix \
+        -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TARGET_TOOLCHAIN} \
+        -DCMAKE_INCLUDE_PATH=$include_paths \
+        -DBUILD_CUSTOM_PROTOBUF=OFF \
+        -DBUILD_PYTHON=OFF \
+        -DPYTHON_EXECUTABLE=`which python3` \
+        -DBUILD_SHARED_LIBS=ON \
+        -DHAVE_SOVERSION=ON \
+        -DUSE_METAL=OFF \
+        -DUSE_MPI=OFF \
+        -DUSE_NCCL=OFF \
+        -DUSE_NNPACK=OFF \
+        -DUSE_NUMA=OFF \
+        -DUSE_NUMPY=OFF \
+        -DUSE_QNNPACK=OFF \
+        -DUSE_SYSTEM_CPUINFO=ON \
+        -DUSE_SYSTEM_PTHREADPOOL=ON \
+        -DUSE_SYSTEM_SLEEF=ON \
+        -DUSE_SYSTEM_XNNPACK=ON \
+        -DPROTOBUF_PROTOC_EXECUTABLE=$host_bindir/protoc \
+        -DCAFFE2_CUSTOM_PROTOC_EXECUTABLE=$host_bindir/protoc \
+        -Wno-dev \
+        $cmake_extra_args \
+        ..
+}
+if [[ $bb_full_target != *cuda* ]]; then
+    configure
+else
+    configure
+    configure || configure
+fi
+cmake --build . -- -j $nproc
+make install
+install_license ../LICENSE
 """
 
 # These are the platforms we will build for by default, unless further
 # platforms are passed in on the command line
-platforms = [
-    Platform("x86_64", "linux"; libc="glibc", cxxstring_abi = "cxx11"),
+platforms = supported_platforms()
+filter!(p -> !(Sys.islinux(p) && libc(p) == "musl"), platforms) # musl fails due to conflicting declaration of C function ‘void __assert_fail(const char*, const char*, int, const char*) - between /opt/x86_64-linux-musl/x86_64-linux-musl/include/c++/8.1.0/cassert:44 and /opt/x86_64-linux-musl/x86_64-linux-musl/sys-root/usr/include/assert.h
+filter!(!Sys.iswindows, platforms) # ONNX does not support cross-compiling for w64-mingw32 on linux
+filter!(p -> arch(p) != "armv6l", platforms) # armv6l is not supported by XNNPACK
+filter!(p -> arch(p) != "armv7l", platforms) # armv7l is not supported by XNNPACK
+filter!(p -> arch(p) != "powerpc64le", platforms) # PowerPC64LE is not supported by XNNPACK
+filter!(!Sys.isfreebsd, platforms) # Build fails: Clang v12 crashes compiling aten/src/ATen/native/cpu/MaxUnpoolKernel.cpp.
+
+mkl_platforms = [
+    Platform("x86_64", "Linux"),
+    Platform("i686", "Linux"),
+    Platform("x86_64", "MacOS"),
+    Platform("x86_64", "Windows"),
 ]
 
+blis_platforms = filter(p -> p ∉ mkl_platforms, [
+    Platform("x86_64", "linux"; libc = "glibc"),
+    Platform("aarch64", "linux"; libc = "glibc"),
+    Platform("armv7l", "linux"; call_abi = "eabihf", libc = "glibc"),
+    Platform("x86_64", "linux"; libc = "musl"),
+    Platform("x86_64", "freebsd"),
+    Platform("aarch64", "macos"),
+    Platform("x86_64", "macos"),
+    Platform("x86_64", "windows"),
+])
+
+openblas_platforms = filter(p -> p ∉ union(mkl_platforms, blis_platforms), platforms)
+
+cuda_platforms = [
+    Platform("x86_64", "Linux"; cuda = "10.2"),
+    Platform("x86_64", "Linux"; cuda = "11.3"),
+]
+for p in cuda_platforms
+    push!(platforms, p)
+end
+
+platforms = expand_cxxstring_abis(platforms)
+mkl_platforms = expand_cxxstring_abis(mkl_platforms)
+blis_platforms = expand_cxxstring_abis(blis_platforms)
+openblas_platforms = expand_cxxstring_abis(openblas_platforms)
+cuda_platforms = expand_cxxstring_abis(cuda_platforms)
+
 # The products that we will ensure are always built
 products = [
-    LibraryProduct("libdoeye_caml", :libdoeye_caml, dont_dlopen = true),
-    LibraryProduct("libtorch", :libtorch, dont_dlopen = true),
+    LibraryProduct(["libtorch", "torch"], :libtorch),
+    LibraryProduct(["libtorch_cpu", "torch_cpu"], :libtorch_cpu),
 ]
 
 # Dependencies that must be installed before this package can be built
 dependencies = [
-    BuildDependency(PackageSpec(name="CUDA_full_jll", version=v"10.1.243")),
-    Dependency(PackageSpec(name="CompilerSupportLibraries_jll")),
+    Dependency(PackageSpec(name="CompilerSupportLibraries_jll", uuid="e66e0078-7015-5450-92f7-15fbd957f2ae")),
+    Dependency("blis_jll"; platforms = blis_platforms),
+    Dependency("CPUInfo_jll"; compat = "0.0.20201217"),
+    Dependency("CUDNN_jll", v"8.2.4"; compat = "8", platforms = cuda_platforms),
+    Dependency("Gloo_jll";  compat = "0.0.20210521", platforms = filter(p -> nbits(p) == 64, platforms)),
+    Dependency("LAPACK_jll"; platforms = openblas_platforms),
+    Dependency("MKL_jll"; platforms = mkl_platforms),
+    BuildDependency("MKL_Headers_jll"; platforms = mkl_platforms),
+    Dependency("OpenBLAS_jll"; platforms = openblas_platforms),
+    Dependency("PThreadPool_jll"; compat = "0.0.20210414"),
+    Dependency("SLEEF_jll", v"3.5.2"; compat = "3"),
+#    Dependency("TensorRT_jll"; platforms = cuda_platforms), # Building with TensorRT is not supported: https://github.com/pytorch/pytorch/issues/60228
+    Dependency("XNNPACK_jll"; compat = "0.0.20210622"),
+    BuildDependency(PackageSpec("protoc_jll", Base.UUID("c7845625-083e-5bbe-8504-b32d602b7110"), v"3.13.0")),
+    HostBuildDependency(PackageSpec("protoc_jll", Base.UUID("c7845625-083e-5bbe-8504-b32d602b7110"), v"3.13.0")),
 ]
 
 # Build the tarballs, and possibly a `build.jl` as well.
-build_tarballs(ARGS, name, version, sources, script, platforms, products, dependencies; preferred_gcc_version = v"7.1.0")
+build_tarballs(ARGS, name, version, sources, script, platforms, products, dependencies;
+    preferred_gcc_version = v"8",
+    julia_compat = "1.6")
diff --git a/T/Torch/bundled/patches/pytorch-aten-qnnpack-cmake-windows.patch b/T/Torch/bundled/patches/pytorch-aten-qnnpack-cmake-windows.patch
@@ -0,0 +1,13 @@
+diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
+index 3901f735a4..5a742c793d 100644
+--- a/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
++++ b/aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
+@@ -61,7 +61,7 @@ endif()
+
+ if(NOT CMAKE_SYSTEM_NAME)
+   message(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined")
+-elseif(NOT CMAKE_SYSTEM_NAME MATCHES "^(Darwin|Linux|Android)$")
++elseif(NOT CMAKE_SYSTEM_NAME MATCHES "^(Darwin|Linux|Android|Windows)$")
+   message(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME = ${CMAKE_SYSTEM_NAME}")
+ endif()
+
diff --git a/T/Torch/bundled/patches/pytorch-cmake-find-cuda.patch b/T/Torch/bundled/patches/pytorch-cmake-find-cuda.patch
@@ -0,0 +1,13 @@
+diff --git a/cmake/public/cuda.cmake b/cmake/public/cuda.cmake
+index 7ba2bb6d4c..476f65f99c 100644
+--- a/cmake/public/cuda.cmake
++++ b/cmake/public/cuda.cmake
+@@ -26,7 +26,7 @@ if(NOT MSVC)
+ endif()
+
+ # Find CUDA.
+-find_package(CUDA)
++enable_language(CUDA)
+ if(NOT CUDA_FOUND)
+   message(WARNING
+     "Caffe2: CUDA cannot be found. Depending on whether you are building "