Skip to content

sycl: GGML_SYCL_DISABLE_OPT on by default for all Intel Devices #13973

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/backend/SYCL.md
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
| Name | Value | Function |
|-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------|
| GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG |
| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase |
| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features for Intel GPUs. (Recommended to 1 for intel devices older than Gen 10) |
| GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. |
| GGML_SYCL_DISABLE_DNN | 0 (default) or 1 | Disable running computations through oneDNN and always use oneMKL. |
| ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer |
Expand Down
25 changes: 1 addition & 24 deletions ggml/src/ggml-sycl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ struct sycl_device_info {
// size_t smpb; // max. shared memory per block
bool vmm; // virtual memory support
size_t total_vram;
sycl_hw_info hw_info;
//sycl_hw_info hw_info; \\ device id and aarch, currently not used
optimize_feature opt_feature;
};

Expand Down Expand Up @@ -288,29 +288,6 @@ struct ggml_tensor_extra_gpu {

void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});

inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
optimize_feature opt;

opt.reorder =
(arch == syclex::architecture::intel_gpu_dg1 ||
arch == syclex::architecture::intel_gpu_acm_g10 ||
arch == syclex::architecture::intel_gpu_acm_g11 ||
arch == syclex::architecture::intel_gpu_acm_g12 ||
arch == syclex::architecture::intel_gpu_pvc ||
arch == syclex::architecture::intel_gpu_pvc_vg ||
arch == syclex::architecture::intel_gpu_mtl_u ||
arch == syclex::architecture::intel_gpu_mtl_s ||
arch == syclex::architecture::intel_gpu_mtl_h ||
arch == syclex::architecture::intel_gpu_arl_u ||
arch == syclex::architecture::intel_gpu_arl_s ||
arch == syclex::architecture::intel_gpu_arl_h ||
arch == syclex::architecture::intel_gpu_bmg_g21 ||
arch == syclex::architecture::intel_gpu_lnl_m
);

return opt;
}

namespace sycl_ex = sycl::ext::oneapi::experimental;
struct ggml_backend_sycl_context {
int device;
Expand Down
6 changes: 2 additions & 4 deletions ggml/src/ggml-sycl/ggml-sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,7 @@ static ggml_sycl_device_info ggml_sycl_init() {

info.devices[i].cc =
100 * prop.get_major_version() + 10 * prop.get_minor_version();
info.devices[i].hw_info = get_device_hw_info(&device);
info.devices[i].opt_feature = check_gpu_optimize_feature(info.devices[i].hw_info.arch);

info.devices[i].opt_feature.reorder = !device.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu);
info.max_work_group_sizes[i] = prop.get_max_work_group_size();
}

Expand Down Expand Up @@ -195,7 +193,7 @@ static void ggml_check_sycl() try {

if (!initialized) {
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 1);
g_ggml_sycl_disable_optimize = get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
Expand Down
4 changes: 3 additions & 1 deletion ggml/src/ggml-sycl/sycl_hw.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "sycl_hw.hpp"


// TODO: currently not used
/*
sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {
sycl_hw_info res;
int32_t id = device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
Expand All @@ -11,3 +12,4 @@ sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {

return res;
}
*/
3 changes: 3 additions & 0 deletions ggml/src/ggml-sycl/sycl_hw.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

namespace syclex = sycl::ext::oneapi::experimental;

// TODO: currently not used
/*
struct sycl_hw_info {
syclex::architecture arch;
int32_t device_id;
Expand All @@ -18,6 +20,7 @@ struct sycl_hw_info {
bool is_in_vector(std::vector<int> &vec, int item);

sycl_hw_info get_device_hw_info(sycl::device *device_ptr);
*/


#endif // SYCL_HW_HPP
Loading