Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build2cmake/src/config/v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ pub enum Dependencies {
Cutlass3_8,
#[serde(rename = "cutlass_3_9")]
Cutlass3_9,
#[serde(rename = "cutlass_sycl_3_9")]
CutlassSycl3_9,
Torch,
}

Expand Down
60 changes: 60 additions & 0 deletions build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
find_package(CutlassSycl)

if (NOT CutlassSycl_FOUND)
set(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
set(CUTLASS_ENABLE_BENCHMARKS OFF CACHE BOOL "Disable CUTLASS Benchmarks")

# Set CUTLASS_REVISION manually -- its revision detection doesn't work in this case.
set(CUTLASS_REVISION "v{{ version }}" CACHE STRING "CUTLASS revision to use")

# Use the specified CUTLASS source directory for compilation if CUTLASS_SYCL_SRC_DIR is provided
if (DEFINED ENV{CUTLASS_SYCL_SRC_DIR})
set(CUTLASS_SYCL_SRC_DIR $ENV{CUTLASS_SYCL_SRC_DIR})
endif()

if(CUTLASS_SYCL_SRC_DIR)
if(NOT IS_ABSOLUTE CUTLASS_SYCL_SRC_DIR)
get_filename_component(CUTLASS_SYCL_SRC_DIR "${CUTLASS_SYCL_SRC_DIR}" ABSOLUTE)
endif()
message(STATUS "The CUTLASS_SYCL_SRC_DIR is set, using ${CUTLASS_SYCL_SRC_DIR} for compilation")
FetchContent_Declare(cutlass SOURCE_DIR ${CUTLASS_SYCL_SRC_DIR})
else()
FetchContent_Declare(
cutlass
GIT_REPOSITORY https://github.com/intel/cutlass-sycl.git
GIT_TAG ${CUTLASS_REVISION}
GIT_PROGRESS TRUE

# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
# Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
# So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
GIT_SHALLOW TRUE
)
endif()

# Set Intel backend env
message(STATUS "Setting Intel GPU optimization env vars for Cutlass-SYCL")
string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_link_flags "${sycl_link_flags}")
string(REPLACE "-device pvc,xe-lpg,ats-m150" "" sycl_link_flags "${sycl_link_flags}")
string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier;")
string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_flags "${sycl_flags}")

set(CUTLASS_ENABLE_SYCL ON CACHE BOOL "Enable SYCL for CUTLASS")
add_compile_definitions(CUTLASS_ENABLE_SYCL=1)
set(DPCPP_SYCL_TARGET "intel_gpu_pvc" CACHE STRING "SYCL target for Intel GPU")
add_compile_definitions(DPCPP_SYCL_TARGET=intel_gpu_pvc)
set(SYCL_INTEL_TARGET ON CACHE BOOL "Enable SYCL for INTEL")
add_compile_definitions(SYCL_INTEL_TARGET=1)

set(ENV{SYCL_PROGRAM_COMPILE_OPTIONS} "-ze-opt-large-register-file")
set(ENV{IGC_VISAOptions} "-perfmodel")
set(ENV{IGC_VectorAliasBBThreshold} "10000")
set(ENV{IGC_ExtraOCLOptions} "-cl-intel-256-GRF-per-thread")

FetchContent_MakeAvailable(cutlass)

include_directories(${CUTLASS_INCLUDE_DIR})
include_directories(${CUTLASS_TOOLS_UTIL_INCLUDE_DIR})
else()
message(STATUS "Using system cutlass with version: ${CutlassSycl_VERSION}")
endif(NOT CutlassSycl_FOUND)
14 changes: 10 additions & 4 deletions build2cmake/src/templates/xpu/preamble.cmake
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
cmake_minimum_required(VERSION 3.26)

# Set Intel SYCL compiler before project() call
find_program(ICX_COMPILER icx)
find_program(ICPX_COMPILER icpx)
if(ICPX_COMPILER)
if(ICX_COMPILER AND ICPX_COMPILER)
set(CMAKE_C_COMPILER ${ICX_COMPILER})
set(CMAKE_CXX_COMPILER ${ICPX_COMPILER})
message(STATUS "Using Intel SYCL compiler: ${ICPX_COMPILER}")
message(STATUS "Using Intel SYCL C++ compiler: ${ICPX_COMPILER} and C compiler: ${ICX_COMPILER}")
else()
message(FATAL_ERROR "Intel SYCL compiler (icpx) not found. Please install Intel oneAPI toolkit.")
message(FATAL_ERROR "Intel SYCL C++ compiler (icpx) and/or C compiler (icx) not found. Please install Intel oneAPI toolkit.")
endif()

project({{ name }})

include(FetchContent)
file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")

include("cmake/utils.cmake")

# Find Python with all necessary components for building extensions
Expand Down Expand Up @@ -42,6 +48,6 @@ add_compile_definitions(USE_XPU)

# Set SYCL-specific flags
# Set comprehensive SYCL compilation and linking flags
set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required'")
set(sycl_link_flags "-fsycl;--offload-compress;-fsycl-targets=spir64_gen,spir64;-Xs;-device pvc,xe-lpg,ats-m150 -options ' -cl-intel-enable-auto-large-GRF-mode -cl-poison-unsupported-fp64-kernels -cl-intel-greater-than-4GB-buffer-required';")
set(sycl_flags "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;")
message(STATUS "Configuring for Intel XPU backend using SYCL")
3 changes: 3 additions & 0 deletions build2cmake/src/torch/cuda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,9 @@ fn render_deps(env: &Environment, build: &Build, write: &mut impl Write) -> Resu
.wrap_err("Cannot render CUTLASS dependency template")?;
}
Dependencies::Torch => (),
_ => {
eprintln!("Warning: CUDA backend doesn't need/support dependency: {dep:?}");
},
};
write.write_all(b"\n")?;
}
Expand Down
21 changes: 16 additions & 5 deletions build2cmake/src/torch/xpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ fn write_cmake(

render_preamble(env, name, cmake_writer)?;

render_deps(build, cmake_writer)?;
render_deps(env, build, cmake_writer)?;

render_binding(env, torch, name, cmake_writer)?;

Expand Down Expand Up @@ -187,7 +187,7 @@ fn render_binding(
Ok(())
}

fn render_deps(build: &Build, write: &mut impl Write) -> Result<()> {
fn render_deps(env: &Environment, build: &Build, write: &mut impl Write) -> Result<()> {
let mut deps = HashSet::new();

for kernel in build.kernels.values() {
Expand All @@ -196,11 +196,22 @@ fn render_deps(build: &Build, write: &mut impl Write) -> Result<()> {

for dep in deps {
match dep {
Dependencies::CutlassSycl3_9 => {
env.get_template("xpu/dep-cutlass-sycl.cmake")
.wrap_err("Cannot get CUTLASS-SYCL dependency template")?
.render_to_write(
context! {
version => "3.9-0.3",
},
&mut *write,
)
.wrap_err("Cannot render CUTLASS-SYCL dependency template")?;
}
Dependencies::Torch => (),
_ => {
// XPU doesn't support CUTLASS dependencies yet
eprintln!("Warning: XPU backend doesn't support dependency: {dep:?}");
}
// XPU supports CUTLASS-SYCL instead of CUTLASS
eprintln!("Warning: XPU backend doesn't need/support dependency: {dep:?}");
},
}
write.write_all(b"\n")?;
}
Expand Down
3 changes: 3 additions & 0 deletions lib/deps.nix
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ let
"cutlass_3_9" = [
pkgs.cutlass_3_9
];
"cutlass_sycl_3_9" = [
pkgs.cutlass_sycl_3_9
];
"torch" = [
torch
torch.cxxdev
Expand Down
Loading