Skip to content

Commit 554c247

Browse files
authored
ggml : remove OpenCL (ggml-org#7735)
ggml-ci
1 parent 0cd6bd3 commit 554c247

21 files changed

+29
-2639
lines changed

.github/workflows/build.yml

+2-34
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,6 @@ jobs:
688688

689689
env:
690690
OPENBLAS_VERSION: 0.3.23
691-
OPENCL_VERSION: 2023.04.17
692-
CLBLAST_VERSION: 1.6.0
693691
SDE_VERSION: 9.33.0-2024-01-07
694692
VULKAN_VERSION: 1.3.261.1
695693

@@ -706,8 +704,6 @@ jobs:
706704
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
707705
- build: 'avx512-x64'
708706
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
709-
- build: 'clblast-x64'
710-
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
711707
- build: 'openblas-x64'
712708
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
713709
- build: 'kompute-x64'
@@ -732,27 +728,6 @@ jobs:
732728
run: |
733729
git submodule update --init kompute
734730
735-
- name: Download OpenCL SDK
736-
id: get_opencl
737-
if: ${{ matrix.build == 'clblast-x64' }}
738-
run: |
739-
curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip"
740-
mkdir $env:RUNNER_TEMP/opencl
741-
tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl
742-
743-
- name: Download CLBlast
744-
id: get_clblast
745-
if: ${{ matrix.build == 'clblast-x64' }}
746-
run: |
747-
curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z"
748-
curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
749-
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z
750-
rename-item $env:RUNNER_TEMP/CLBlast-${env:CLBLAST_VERSION}-windows-x64 clblast
751-
foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
752-
$txt = Get-Content -Path $f -Raw
753-
$txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
754-
}
755-
756731
- name: Download OpenBLAS
757732
id: get_openblas
758733
if: ${{ matrix.build == 'openblas-x64' }}
@@ -786,13 +761,6 @@ jobs:
786761
cmake -S . -B build ${{ matrix.defines }}
787762
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
788763
789-
- name: Add clblast.dll
790-
id: add_clblast_dll
791-
if: ${{ matrix.build == 'clblast-x64' }}
792-
run: |
793-
cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release
794-
cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt
795-
796764
- name: Add libopenblas.dll
797765
id: add_libopenblas_dll
798766
if: ${{ matrix.build == 'openblas-x64' }}
@@ -816,7 +784,7 @@ jobs:
816784
- name: Test
817785
id: cmake_test
818786
# not all machines have native AVX-512
819-
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'clblast-x64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
787+
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
820788
run: |
821789
cd build
822790
ctest -L main -C Release --verbose --timeout 900
@@ -1071,7 +1039,7 @@ jobs:
10711039
# hypervisor: 'qemu'
10721040
# run: |
10731041
# sudo pkg update
1074-
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 clinfo clover opencl clblast openblas
1042+
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
10751043
# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
10761044

10771045
release:

CMakeLists.txt

+3-20
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,6 @@ option(LLAMA_CUDA_FA_ALL_QUANTS "llama: compile all quants for Flas
111111
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
112112
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
113113
option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF)
114-
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
115114
option(LLAMA_VULKAN "llama: use Vulkan" OFF)
116115
option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF)
117116
option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF)
@@ -502,22 +501,6 @@ if (LLAMA_RPC)
502501
set(GGML_SOURCES_RPC ggml-rpc.cpp)
503502
endif()
504503

505-
if (LLAMA_CLBLAST)
506-
find_package(CLBlast)
507-
if (CLBlast_FOUND)
508-
message(STATUS "CLBlast found")
509-
510-
set(GGML_HEADERS_OPENCL ggml-opencl.h)
511-
set(GGML_SOURCES_OPENCL ggml-opencl.cpp)
512-
513-
add_compile_definitions(GGML_USE_CLBLAST)
514-
515-
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
516-
else()
517-
message(WARNING "CLBlast not found")
518-
endif()
519-
endif()
520-
521504
if (LLAMA_VULKAN)
522505
find_package(Vulkan)
523506
if (Vulkan_FOUND)
@@ -1265,7 +1248,6 @@ add_library(ggml OBJECT
12651248
ggml-quants.c
12661249
ggml-quants.h
12671250
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1268-
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
12691251
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
12701252
${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
12711253
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
@@ -1353,8 +1335,9 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
13531335
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
13541336

13551337
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
1356-
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
1357-
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
1338+
"${GGML_HEADERS_CUDA}"
1339+
"${GGML_HEADERS_METAL}"
1340+
"${GGML_HEADERS_EXTRA}")
13581341

13591342
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
13601343
install(TARGETS ggml PUBLIC_HEADER)

Makefile

-17
Original file line numberDiff line numberDiff line change
@@ -547,23 +547,6 @@ ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h
547547
$(NVCC_COMPILE)
548548
endif # LLAMA_CUDA
549549

550-
ifdef LLAMA_CLBLAST
551-
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
552-
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
553-
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
554-
555-
# Mac provides OpenCL as a framework
556-
ifeq ($(UNAME_S),Darwin)
557-
MK_LDFLAGS += -lclblast -framework OpenCL
558-
else
559-
MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL)
560-
endif
561-
OBJS += ggml-opencl.o
562-
563-
ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
564-
$(CXX) $(CXXFLAGS) -c $< -o $@
565-
endif # LLAMA_CLBLAST
566-
567550
ifdef LLAMA_VULKAN
568551
MK_CPPFLAGS += -DGGML_USE_VULKAN
569552
MK_LDFLAGS += -lvulkan

README-sycl.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ The llama.cpp SYCL backend is designed to support **Intel GPU** firstly. Based o
2929

3030
When targeting **Intel CPU**, it is recommended to use llama.cpp for [Intel oneMKL](README.md#intel-onemkl) backend.
3131

32-
It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, CLBlast etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose.
32+
It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose.
3333

3434
## News
3535

README.md

+3-113
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ variety of hardware - locally and in the cloud.
7777
- AVX, AVX2 and AVX512 support for x86 architectures
7878
- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use
7979
- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP)
80-
- Vulkan, SYCL, and (partial) OpenCL backend support
80+
- Vulkan and SYCL backend support
8181
- CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity
8282

8383
Since its [inception](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022), the project has
@@ -371,16 +371,11 @@ In order to build llama.cpp you have four different options.
371371
3. Install compilation dependencies.
372372

373373
```bash
374-
sudo pkg install gmake automake autoconf pkgconf llvm15 clinfo clover \
375-
opencl clblast openblas
374+
sudo pkg install gmake automake autoconf pkgconf llvm15 openblas
376375
377376
gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j4
378377
```
379378

380-
**Notes:** With this packages you can build llama.cpp with OPENBLAS and
381-
CLBLAST support for use OpenCL GPU acceleration in FreeBSD. Please read
382-
the instructions for use and activate this options in this document below.
383-
384379
### Homebrew
385380

386381
On Mac and Linux, the homebrew package manager can be used via
@@ -399,7 +394,7 @@ argument.
399394
400395
### BLAS Build
401396
402-
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS and CLBlast. There are currently several different BLAS implementations available for build and use:
397+
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS. There are currently several different BLAS implementations available for build and use:
403398
404399
- #### Accelerate Framework:
405400
@@ -553,111 +548,6 @@ Building the program with BLAS support may lead to some performance improvements
553548
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the HIP mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
554549
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per HIP thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
555550
556-
- #### CLBlast
557-
558-
OpenCL acceleration is provided by the matrix multiplication kernels from the [CLBlast](https://github.com/CNugteren/CLBlast) project and custom kernels for ggml that can generate tokens on the GPU.
559-
560-
You will need the [OpenCL SDK](https://github.com/KhronosGroup/OpenCL-SDK).
561-
- For Ubuntu, Debian, and Fedora the packages `opencl-headers`, `ocl-icd` may be needed.
562-
563-
- For Windows, a pre-built SDK is available on the [OpenCL Releases](https://github.com/KhronosGroup/OpenCL-SDK/releases) page.
564-
565-
- <details>
566-
<summary>Installing the OpenCL SDK from source</summary>
567-
568-
```sh
569-
git clone --recurse-submodules https://github.com/KhronosGroup/OpenCL-SDK.git
570-
cd OpenCL-SDK
571-
cmake -B build -DBUILD_DOCS=OFF \
572-
-DBUILD_EXAMPLES=OFF \
573-
-DBUILD_TESTING=OFF \
574-
-DOPENCL_SDK_BUILD_SAMPLES=OFF \
575-
-DOPENCL_SDK_TEST_SAMPLES=OFF
576-
cmake --build build
577-
cmake --install build --prefix /some/path
578-
```
579-
</details>
580-
581-
##### Installing CLBlast
582-
583-
Pre-built CLBlast binaries may be found on the [CLBlast Releases](https://github.com/CNugteren/CLBlast/releases) page. For Unix variants, it may also be found in your operating system's packages.
584-
585-
Linux packaging:
586-
Fedora Linux:
587-
```bash
588-
sudo dnf install clblast
589-
```
590-
591-
Alternatively, they may be built from source.
592-
593-
- <details>
594-
<summary>Windows:</summary>
595-
596-
```cmd
597-
set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64"
598-
git clone https://github.com/CNugteren/CLBlast.git
599-
cd CLBlast
600-
cmake -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64
601-
cmake --build build --config Release
602-
cmake --install build --prefix C:/CLBlast
603-
```
604-
605-
(note: `--config Release` at build time is the default and only relevant for Visual Studio builds - or multi-config Ninja builds)
606-
607-
- <details>
608-
<summary>Unix:</summary>
609-
610-
```sh
611-
git clone https://github.com/CNugteren/CLBlast.git
612-
cd CLBlast
613-
cmake -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF
614-
cmake --build build --config Release
615-
cmake --install build --prefix /some/path
616-
```
617-
618-
Where `/some/path` is where the built library will be installed (default is `/usr/local`).
619-
</details>
620-
621-
##### Building Llama with CLBlast
622-
623-
- Build with make:
624-
```sh
625-
make LLAMA_CLBLAST=1
626-
```
627-
- CMake (Unix):
628-
```sh
629-
cmake -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path
630-
cmake --build build --config Release
631-
```
632-
- CMake (Windows):
633-
```cmd
634-
set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast"
635-
git clone https://github.com/ggerganov/llama.cpp
636-
cd llama.cpp
637-
cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64
638-
cmake --build build --config Release
639-
cmake --install build --prefix C:/LlamaCPP
640-
```
641-
642-
##### Running Llama with CLBlast
643-
644-
The CLBlast build supports `--gpu-layers|-ngl` like the CUDA version does.
645-
646-
To select the correct platform (driver) and device (GPU), you can use the environment variables `GGML_OPENCL_PLATFORM` and `GGML_OPENCL_DEVICE`.
647-
The selection can be a number (starting from 0) or a text string to search:
648-
649-
```sh
650-
GGML_OPENCL_PLATFORM=1 ./main ...
651-
GGML_OPENCL_DEVICE=2 ./main ...
652-
GGML_OPENCL_PLATFORM=Intel ./main ...
653-
GGML_OPENCL_PLATFORM=AMD GGML_OPENCL_DEVICE=1 ./main ...
654-
```
655-
656-
The default behavior is to find the first GPU device, but when it is an integrated GPU on a laptop, for instance, the selectors are useful.
657-
Using the variables it is possible to select a CPU-based driver as well, if so desired.
658-
659-
You can get a list of platforms and devices from the `clinfo -l` command, etc.
660-
661551
- #### Vulkan
662552
663553
**With docker**:

common/common.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -2844,7 +2844,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
28442844
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
28452845
fprintf(stream, "cpu_has_cuda: %s\n", ggml_cpu_has_cuda() ? "true" : "false");
28462846
fprintf(stream, "cpu_has_vulkan: %s\n", ggml_cpu_has_vulkan() ? "true" : "false");
2847-
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
28482847
fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
28492848
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
28502849
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");

0 commit comments

Comments
 (0)