@@ -22,141 +22,162 @@ function(CHECK_SYCL_FLAG FLAG VARIABLE_NAME)
2222 file (REMOVE_RECURSE ${TEMP_DIR} )
2323endfunction ()
2424
25- # Support GCC on Linux and MSVC on Windows at the moment.
26- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
27- # # -- Host flags (SYCL_CXX_FLAGS)
28- if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
29- list (APPEND SYCL_HOST_FLAGS /std:c++17)
30- list (APPEND SYCL_HOST_FLAGS /MD )
31- list (APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
32- # SYCL headers warnings
33- list (APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
34- list (APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
35- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
36- list (APPEND SYCL_HOST_FLAGS -fPIC)
37- list (APPEND SYCL_HOST_FLAGS -std=c++17)
38- list (APPEND SYCL_HOST_FLAGS -Wunused-variable )
39- # Some versions of DPC++ compiler pass paths to SYCL headers as user include paths (`-I`) rather
40- # than system paths (`-isystem`). This makes host compiler to report warnings encountered in the
41- # SYCL headers, such as deprecated warnings, even if warned API is not actually used in the program.
42- # We expect that this issue will be addressed in the later version of DPC++ compiler. To workaround
43- # the issue we wrap paths to SYCL headers in `-isystem`.
44- foreach (FLAGS IN LISTS SYCL_INCLUDE_DIR)
45- list (APPEND SYCL_HOST_FLAGS "-isystem ${FLAGS} " )
46- endforeach ()
47- # Excluding warnings which flood the compilation output
48- # TODO: fix warnings in the source code and then reenable them in compilation
49- list (APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
50- endif ()
25+ macro (set_build_flags)
26+ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
27+ set (SYCL_HOST_FLAGS)
28+ set (SYCL_KERNEL_OPTIONS)
29+ set (SYCL_COMPILE_FLAGS ${SYCL_FLAGS} )
30+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_LINK_FLAGS} )
31+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS)
32+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS)
33+ set (SYCL_OFFLINE_COMPILER_FLAGS)
5134
52- if (CMAKE_BUILD_TYPE MATCHES Debug)
53- list (APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
54- elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
55- list (APPEND SYCL_HOST_FLAGS -g -O2)
56- endif ()
57- if (USE_PER_OPERATOR_HEADERS)
58- list (APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
59- endif ()
60- list (APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER} )
61- # -- Kernel flags (SYCL_KERNEL_OPTIONS)
62- # The fast-math will be enabled by default in SYCL compiler.
63- # Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
64- # 1. We enable below flags here to be warn about NaN and Infinity,
65- # which will be hidden by fast-math by default.
66- # 2. The associative-math in fast-math allows floating point
67- # operations to be reassociated, which will lead to non-deterministic
68- # results compared with CUDA backend.
69- # 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
70- # to be replaced with an approximately equivalent set of instructions or
71- # alternative math function calls, which have great errors.
72- #
73- # PSEUDO of separate compilation with DPCPP compiler.
74- # 1. Kernel source compilation:
75- # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_FLAGS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
76- # 2. Device code linkage:
77- # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
78- # 3. Host only source compilation:
79- # gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
80- # 4. Linkage:
81- # gcc -shared host.o kernel.o device-code.o -o libxxx.so
82- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
83- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
84- if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
85- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
86- # Suppress warnings about dllexport.
87- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-ignored-attributes)
88- elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
89- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
90- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
91- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
92- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
93- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute -value )
94- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no -ftz)
95- endif ()
35+ if (REPLACE_FLAGS_FOR_SYCLTLA)
36+ set (CPP_STD c++20)
37+ else ()
38+ set (CPP_STD c++17)
39+ endif ()
40+ # # -- Host flags (SYCL_CXX_FLAGS)
41+ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
42+ list (APPEND SYCL_HOST_FLAGS /std:${CPP_STD} )
43+ list (APPEND SYCL_HOST_FLAGS /MD )
44+ list (APPEND SYCL_HOST_FLAGS /EHsc) # exception handling
45+ # SYCL headers warnings
46+ list (APPEND SYCL_HOST_FLAGS /wd4996) # allow usage of deprecated functions
47+ list (APPEND SYCL_HOST_FLAGS /wd4018) # allow signed and unsigned comparison
48+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
49+ list (APPEND SYCL_HOST_FLAGS -fPIC)
50+ list (APPEND SYCL_HOST_FLAGS -std=${CPP_STD} )
51+ list (APPEND SYCL_HOST_FLAGS -Wunused-variable )
52+ # Some versions of DPC++ compiler pass paths to SYCL headers as user include paths (`-I`) rather
53+ # than system paths (`-isystem`). This makes host compiler to report warnings encountered in the
54+ # SYCL headers, such as deprecated warnings, even if warned API is not actually used in the program.
55+ # We expect that this issue will be addressed in the later version of DPC++ compiler. To workaround
56+ # the issue we wrap paths to SYCL headers in `-isystem`.
57+ foreach (FLAGS IN LISTS SYCL_INCLUDE_DIR)
58+ list (APPEND SYCL_HOST_FLAGS "-isystem ${FLAGS} " )
59+ endforeach ()
60+ # Excluding warnings which flood the compilation output
61+ # TODO: fix warnings in the source code and then reenable them in compilation
62+ list (APPEND SYCL_HOST_FLAGS -Wno-sign-compare)
63+ endif ()
9664
97- if (CMAKE_BUILD_TYPE MATCHES Debug)
98- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
99- elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
100- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
101- endif ()
65+ if (CMAKE_BUILD_TYPE MATCHES Debug)
66+ list (APPEND SYCL_HOST_FLAGS -g -fno-omit-frame-pointer -O0)
67+ elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
68+ list (APPEND SYCL_HOST_FLAGS -g -O2)
69+ endif ()
70+ if (USE_PER_OPERATOR_HEADERS)
71+ list (APPEND SYCL_HOST_FLAGS -DAT_PER_OPERATOR_HEADERS)
72+ endif ()
73+ list (APPEND SYCL_HOST_FLAGS -D__INTEL_LLVM_COMPILER_VERSION=${__INTEL_LLVM_COMPILER} )
74+ # -- Kernel flags (SYCL_KERNEL_OPTIONS)
75+ # The fast-math will be enabled by default in SYCL compiler.
76+ # Refer to [https://clang.llvm.org/docs/UsersManual.html#cmdoption-fno-fast-math]
77+ # 1. We enable below flags here to be warn about NaN and Infinity,
78+ # which will be hidden by fast-math by default.
79+ # 2. The associative-math in fast-math allows floating point
80+ # operations to be reassociated, which will lead to non-deterministic
81+ # results compared with CUDA backend.
82+ # 3. The approx-func allows certain math function calls (such as log, sqrt, pow, etc)
83+ # to be replaced with an approximately equivalent set of instructions or
84+ # alternative math function calls, which have great errors.
85+ #
86+ # PSEUDO of separate compilation with DPCPP compiler.
87+ # 1. Kernel source compilation:
88+ # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} ${SYCL_KERNEL_OPTIONS} -fsycl-host-compiler=gcc -fsycl-host-compiler-options='${CMAKE_HOST_FLAGS}' kernel.cpp -o kernel.o
89+ # 2. Device code linkage:
90+ # icpx -fsycl -fsycl-target=${SYCL_TARGETS_OPTION} -fsycl-link ${SYCL_DEVICE_LINK_FLAGS} -Xs '${SYCL_OFFLINE_COMPILER_FLAGS}' kernel.o -o device-code.o
91+ # 3. Host only source compilation:
92+ # gcc ${CMAKE_HOST_FLAGS} host.cpp -o host.o
93+ # 4. Linkage:
94+ # gcc -shared host.o kernel.o device-code.o -o libxxx.so
95+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-sycl-unnamed-lambda)
96+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -sycl-std=2020)
97+ if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" )
98+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} /fp:strict)
99+ # Suppress warnings about dllexport.
100+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-ignored-attributes)
101+ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
102+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-nans)
103+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fhonor-infinities)
104+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-associative-math)
105+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fno-approx-func)
106+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -Wno-absolute -value )
107+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -no -ftz)
108+ endif ()
102109
103- CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
104- if (NOT SUPPORTS_FP64_CONV_EMU)
105- message (WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
106- will disable it. On some platforms that don't support FP64, \
107- running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
108- or a Native API failed error." )
109- endif ()
110+ if (CMAKE_BUILD_TYPE MATCHES Debug)
111+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -g -O0 -Rno-debug-disables-optimization)
112+ elseif (CMAKE_BUILD_TYPE MATCHES RelWithDebInfo)
113+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -gline-tables-only -O2)
114+ endif ()
115+
116+ CHECK_SYCL_FLAG("-fsycl-fp64-conv-emu" SUPPORTS_FP64_CONV_EMU)
117+ if (NOT SUPPORTS_FP64_CONV_EMU)
118+ message (WARNING "The compiler does not support the '-fsycl-fp64-conv-emu' flag, \
119+ will disable it. On some platforms that don't support FP64, \
120+ running operations with the FP64 datatype will raise a Runtime error: Required aspect fp64 is not supported on the device \
121+ or a Native API failed error." )
122+ endif ()
110123
111- set (TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS} )
124+ set (TORCH_XPU_OPS_FLAGS ${SYCL_HOST_FLAGS} )
112125
113- # -- SYCL device object linkage flags
114- include (ProcessorCount)
115- ProcessorCount(proc_cnt)
116- if ((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS} " LESS_EQUAL ${proc_cnt} ))
117- set (SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS} )
118- else ()
119- set (SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt} )
120- endif ()
121- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS} )
122- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
126+ # -- SYCL device object linkage flags
127+ include (ProcessorCount)
128+ ProcessorCount(proc_cnt)
129+ if ((DEFINED ENV{MAX_JOBS}) AND ("$ENV{MAX_JOBS} " LESS_EQUAL ${proc_cnt} ))
130+ set (SYCL_MAX_PARALLEL_LINK_JOBS $ENV{MAX_JOBS} )
131+ else ()
132+ set (SYCL_MAX_PARALLEL_LINK_JOBS ${proc_cnt} )
133+ endif ()
134+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} -fsycl-max-parallel-link-jobs=${SYCL_MAX_PARALLEL_LINK_JOBS} )
135+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} --offload-compress)
123136
124- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels" )
125- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode" )
126- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt" )
127- set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required" )
137+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-poison-unsupported-fp64-kernels" )
138+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-enable-auto-large-GRF-mode" )
139+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-fp32-correctly-rounded-divide-sqrt" )
140+ set (SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -options -cl-intel-greater-than-4GB-buffer-required" )
128141
129- if (WIN32 )
130- set (AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m" )
131- else ()
132- set (AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m" )
133- endif ()
134- if (TORCH_XPU_ARCH_LIST)
135- set (AOT_TARGETS "${TORCH_XPU_ARCH_LIST} " )
136- endif ()
137- if (AOT_TARGETS STREQUAL "none" )
138- set (TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
139- else ()
140- # Enable FP64 conversion emulation for DG2 / ATS-M targets
141- if (SUPPORTS_FP64_CONV_EMU)
142- string (FIND "${AOT_TARGETS} " "dg2" _dg2_index)
143- string (FIND "${AOT_TARGETS} " "ats-m" _atsm_index)
144- if (_dg2_index GREATER_EQUAL 0 OR _atsm_index GREATER_EQUAL 0)
145- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
142+ if (REPLACE_FLAGS_FOR_SYCLTLA)
143+ set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen)
144+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
145+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
146+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier,+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate" )
147+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device pvc,bmg" )
148+ else ()
149+ if (WIN32 )
150+ set (AOT_TARGETS "mtl,mtl-h,bmg,dg2,arl-h,lnl-m" )
151+ else ()
152+ set (AOT_TARGETS "pvc,bmg,dg2,arl-h,mtl-h,lnl-m" )
153+ endif ()
154+ if (TORCH_XPU_ARCH_LIST)
155+ set (AOT_TARGETS "${TORCH_XPU_ARCH_LIST} " )
156+ endif ()
157+ if (AOT_TARGETS STREQUAL "none" )
158+ set (TORCH_XPU_ARCH_LIST "" PARENT_SCOPE)
159+ else ()
160+ if (SUPPORTS_FP64_CONV_EMU)
161+ string (FIND "${AOT_TARGETS} " "dg2" _dg2_index)
162+ string (FIND "${AOT_TARGETS} " "ats-m" _atsm_index)
163+ if (_dg2_index GREATER_EQUAL 0 OR _atsm_index GREATER_EQUAL 0)
164+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} -fsycl-fp64-conv-emu)
165+ endif ()
166+ endif ()
167+ set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
168+ set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
169+ set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
170+ set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS} " )
171+ set (TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
146172 endif ()
173+ message (STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS} " )
147174 endif ()
148- set (SYCL_TARGETS_OPTION -fsycl-targets=spir64_gen,spir64)
149- set (SYCL_KERNEL_OPTIONS ${SYCL_KERNEL_OPTIONS} ${SYCL_TARGETS_OPTION} )
150- set (SYCL_DEVICE_LINK_FLAGS ${SYCL_DEVICE_LINK_FLAGS} ${SYCL_TARGETS_OPTION} )
151- set (SYCL_OFFLINE_COMPILER_AOT_OPTIONS "-device ${AOT_TARGETS} " )
152- set (TORCH_XPU_ARCH_LIST ${AOT_TARGETS} PARENT_SCOPE)
153- endif ()
154- message (STATUS "Compile Intel GPU AOT Targets for ${AOT_TARGETS} " )
155175
156- set (SYCL_FLAGS ${SYCL_FLAGS } ${SYCL_KERNEL_OPTIONS} )
176+ set (SYCL_COMPILE_FLAGS ${SYCL_COMPILE_FLAGS } ${SYCL_KERNEL_OPTIONS} )
157177
158- set (SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS} " )
159- else ()
160- message ("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler." )
161- return ()
162- endif ()
178+ set (SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS} " )
179+ else ()
180+ message ("Not compiling with XPU. Currently only support GCC compiler on Linux and MSVC compiler on Windows as CXX compiler." )
181+ return ()
182+ endif ()
183+ endmacro ()
0 commit comments