diff --git a/T/Torch/build_tarballs.jl b/T/Torch/build_tarballs.jl
index 1df37184033..cd12c10c7f4 100644
--- a/T/Torch/build_tarballs.jl
+++ b/T/Torch/build_tarballs.jl
@@ -10,7 +10,8 @@ sources = [
     GitSource("https://github.com/pytorch/pytorch.git", "71f889c7d265b9636b93ede9d651c0a9c4bee191"),
     FileSource("https://micromamba.snakepit.net/api/micromamba/linux-64/0.21.1", "c907423887b43bec4e8b24f17471262c8087b7095683f41dcef4a4e24e9a3bbd"; filename = "micromamba.tar.bz2"),
     ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v10.2.89%2B5/CUDA_full.v10.2.89.x86_64-linux-gnu.tar.gz", "60e6f614db3b66d955b7e6aa02406765e874ff475c69e2b4a04eb95ba65e4f3b"; unpack_target = "CUDA_full.v10.2"),
-    ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v11.3.1%2B1/CUDA_full.v11.3.1.x86_64-linux-gnu.tar.gz", "9ae00d36d39b04e8e99ace63641254c93a931dcf4ac24c8eddcdfd4625ab57d6"; unpack_target = "CUDA_full.v11.3"),
+    ArchiveSource("https://github.com/JuliaBinaryWrappers/CUDA_full_jll.jl/releases/download/CUDA_full-v11.3.1%2B0/CUDA_full.v11.3.1.x86_64-linux-gnu.tar.gz", "4094b8f1a3667166c1281faa7958cc46dbdc86ac86979e86d462a3c58f2a4b54"; unpack_target = "CUDA_full.v11.3"),
+    GitSource("https://github.com/NVIDIA/thrust.git", "bdedc53ec19488704ba1461a79f6cd8d785fcc3e"), # Thrust v1.11.0 matches CUDA 11.3
     DirectorySource("./bundled"),
 ]
 
@@ -34,10 +35,6 @@ atomic_patch -p1 ../patches/pytorch-aten-qnnpack-cmake-windows.patch
 cmake_extra_args=""
 include_paths=""
 
-if [[ $bb_full_target == *cxx11* ]]; then
-    cmake_extra_args+="-DGLIBCXX_USE_CXX11_ABI=1 "
-fi
-
 if [[ $target == i686-linux-gnu*
     || $target == x86_64-linux-gnu*
     || $target == x86_64-apple-darwin*
@@ -56,22 +53,19 @@ elif [[ $bb_full_target == armv6l-linux-gnu* ]]; then
     cmake_extra_args+="-DBLAS=OpenBLAS "
 fi
 
-if [[ $target == x86_64* ]]; then # Restricting PYTORCH_QNNPACK to x86_64: Adapted from https://salsa.debian.org/deeplearning-team/pytorch/-/blob/master/debian/rules
+if [[ $target == x86_64* ]]; then
     cmake_extra_args+="-DUSE_PYTORCH_QNNPACK=ON "
 else
     cmake_extra_args+="-DUSE_PYTORCH_QNNPACK=OFF "
 fi
 
-if [[ $target == aarch64-linux-gnu* # Disabled use of breakpad on aarch64-linux-gnu: Fails to build embedded breakpad library.
-    || $target == *-w64-mingw32* # Disabling breakpad enables configure on Windows - in combination with pytorch-aten-qnnpack-cmake-windows.patch
-    || $target == *-freebsd*
-]]; then
+if [[ $target == aarch64-linux-gnu* || $target == *-w64-mingw32* || $target == *-freebsd* ]]; then
     cmake_extra_args+="-DUSE_BREAKPAD=OFF "
 else
     cmake_extra_args+="-DUSE_BREAKPAD=ON "
 fi
 
-if [[ $target == *-linux-musl* # Disabled use of TensorPipe on linux-musl: Fails to build embedded TensorPipe library.
+if [[ $target == *-linux-musl*
     || $target == *-w64-mingw32* # TensorPipe cannot be used on Windows
 ]]; then
     cmake_extra_args+="-DUSE_TENSORPIPE=OFF "
@@ -88,9 +82,9 @@ if [[ $target != arm-* && $target == *-linux-musl* ]]; then
     cmake_extra_args+="-DUSE_SYSTEM_GLOO=ON "
 fi
 
-if [[ $target == aarch64-* # A compiler with AVX512 support is required for FBGEM
-    || $target == arm-* # A compiler with AVX512 support is required for FBGEM
-    || $target == i686-* # x64 operating system is required for FBGEMM
+if [[ $target == aarch64-linux-* # A compiler with AVX512 support is required for FBGEM
+    || $target == arm-linux-* # A compiler with AVX512 support is required for FBGEM
+    || $target == i686-linux-* # x64 operating system is required for FBGEMM
     || $target == x86_64-w64-mingw32*
 ]]; then
     cmake_extra_args+="-DUSE_FBGEMM=OFF -DUSE_FAKELOWP=OFF "
@@ -110,7 +104,7 @@ if [[ $bb_full_target == *cuda* ]]; then
     apk del cmake
     apk add 'cmake<3.17' --repository=http://dl-cdn.alpinelinux.org/alpine/v3.11/main
     export PATH=$PATH:$cuda_full_path/bin
-    export CUDACXX=$cuda_full_path/bin/nvcc
+    export CUDACXX="ccache nvcc"
     export CUDAHOSTCXX=$CXX
     mkdir $WORKSPACE/tmpdir
     export TMPDIR=$WORKSPACE/tmpdir
@@ -125,13 +119,23 @@ if [[ $bb_full_target == *cuda* ]]; then
         -DCUDA_cufft_LIBRARY=$cuda_full_path/lib64/libcufft.$dlext \
         -DCUDA_curand_LIBRARY=$cuda_full_path/lib64/libcurand.$dlext \
         -DCUDA_cusolver_LIBRARY=$cuda_full_path/lib64/libcusolver.$dlext \
-        -DCUDA_cusparse_LIBRARY=$cuda_full_path/lib64/libcusparse.$dlext \
-        -DCUDA_TOOLKIT_INCLUDE=$includedir;$cuda_full_path/include \
-        -DCUB_INCLUDE_DIR=$WORKSPACE/srcdir/pytorch/third_party/cub "
+        -DCUDA_cusparse_LIBRARY=$cuda_full_path/lib64/libcusparse.$dlext "
     include_paths+=":$cuda_full_path/include"
+    if [[ $bb_full_target == *cuda+11.3* ]]; then # HACK Workaround for missing thrust in CUDA_full 11.3
+        cd $WORKSPACE/srcdir/thrust && git config --file=.gitmodules submodule.cub.url https://github.com/NVIDIA/cub.git && git submodule update --init --recursive && cd $WORKSPACE/srcdir/pytorch
+        cmake_extra_args+="\
+            -DCUDA_TOOLKIT_INCLUDE=$includedir;$cuda_full_path/include;$WORKSPACE/srcdir/thrust;$WORKSPACE/srcdir/thrust/dependencies/cub \
+            -DCUB_INCLUDE_DIR=$WORKSPACE/srcdir/thrust/dependencies/cub "
+        include_paths+=":$WORKSPACE/srcdir/thrust"
+        include_paths+=":$WORKSPACE/srcdir/thrust/dependencies/cub"
+    else
+        git submodule update --init third_party/cub
+        cmake_extra_args+="\
+            -DCUDA_TOOLKIT_INCLUDE=$includedir;$cuda_full_path/include \
+            -DCUB_INCLUDE_DIR=$WORKSPACE/srcdir/pytorch/third_party/cub "
+    fi
     micromamba install -y magma-cuda${cuda_version_major}${cuda_version_minor} -c pytorch
     git submodule update --init \
-        third_party/cub \
         third_party/cudnn_frontend
 else
     cmake_extra_args+="-DUSE_CUDA=OFF -DUSE_MAGMA=OFF "
@@ -200,9 +204,8 @@ install_license ../LICENSE
 platforms = supported_platforms()
 filter!(p -> !(Sys.islinux(p) && libc(p) == "musl"), platforms) # musl fails due to conflicting declaration of C function ‘void __assert_fail(const char*, const char*, int, const char*) - between /opt/x86_64-linux-musl/x86_64-linux-musl/include/c++/8.1.0/cassert:44 and /opt/x86_64-linux-musl/x86_64-linux-musl/sys-root/usr/include/assert.h
 filter!(!Sys.iswindows, platforms) # ONNX does not support cross-compiling for w64-mingw32 on linux
-filter!(p -> arch(p) != "armv6l", platforms) # armv6l is not supported by XNNPACK
-filter!(p -> arch(p) != "armv7l", platforms) # armv7l is not supported by XNNPACK
 filter!(p -> arch(p) != "powerpc64le", platforms) # PowerPC64LE is not supported by XNNPACK
+filter!(p -> !(Sys.isapple(p) && arch(p) == "aarch64"), platforms) # aarch64-apple not supported by CPUInfo_jll v0.0.20200612 referenced by XNNPACK_jll v0.0.20200323
 filter!(!Sys.isfreebsd, platforms) # Build fails: Clang v12 crashes compiling aten/src/ATen/native/cpu/MaxUnpoolKernel.cpp.
 
 mkl_platforms = [
@@ -233,12 +236,6 @@ for p in cuda_platforms
     push!(platforms, p)
 end
 
-platforms = expand_cxxstring_abis(platforms)
-mkl_platforms = expand_cxxstring_abis(mkl_platforms)
-blis_platforms = expand_cxxstring_abis(blis_platforms)
-openblas_platforms = expand_cxxstring_abis(openblas_platforms)
-cuda_platforms = expand_cxxstring_abis(cuda_platforms)
-
 # The products that we will ensure are always built
 products = [
     LibraryProduct(["libtorch", "torch"], :libtorch),
@@ -249,22 +246,22 @@ products = [
 dependencies = [
     Dependency(PackageSpec(name="CompilerSupportLibraries_jll", uuid="e66e0078-7015-5450-92f7-15fbd957f2ae")),
     Dependency("blis_jll"; platforms = blis_platforms),
-    Dependency("CPUInfo_jll"; compat = "0.0.20201217"),
-    Dependency("CUDNN_jll", v"8.2.4"; compat = "8", platforms = cuda_platforms),
-    Dependency("Gloo_jll";  compat = "0.0.20210521", platforms = filter(p -> nbits(p) == 64, platforms)),
+    Dependency("CPUInfo_jll"),
+    Dependency("CUDNN_jll"; platforms = cuda_platforms),
+    Dependency("Gloo_jll"; platforms = filter(p -> nbits(p) == 64, platforms)),
     Dependency("LAPACK_jll"; platforms = openblas_platforms),
     Dependency("MKL_jll"; platforms = mkl_platforms),
     BuildDependency("MKL_Headers_jll"; platforms = mkl_platforms),
     Dependency("OpenBLAS_jll"; platforms = openblas_platforms),
-    Dependency("PThreadPool_jll"; compat = "0.0.20210414"),
-    Dependency("SLEEF_jll", v"3.5.2"; compat = "3"),
+    Dependency("PThreadPool_jll"),
+    Dependency("SLEEF_jll"),
 #    Dependency("TensorRT_jll"; platforms = cuda_platforms), # Building with TensorRT is not supported: https://github.com/pytorch/pytorch/issues/60228
-    Dependency("XNNPACK_jll"; compat = "0.0.20210622"),
+    Dependency("XNNPACK_jll", v"0.0.20200323"),
     BuildDependency(PackageSpec("protoc_jll", Base.UUID("c7845625-083e-5bbe-8504-b32d602b7110"), v"3.13.0")),
     HostBuildDependency(PackageSpec("protoc_jll", Base.UUID("c7845625-083e-5bbe-8504-b32d602b7110"), v"3.13.0")),
 ]
 
 # Build the tarballs, and possibly a `build.jl` as well.
 build_tarballs(ARGS, name, version, sources, script, platforms, products, dependencies;
-    preferred_gcc_version = v"7",
+    preferred_gcc_version = v"8",
     julia_compat = "1.6")