Skip to content

Commit 70c9355

Browse files
committed
[CI][Benchmarks] update llama.cpp and requirements to latest
This patch updates llama.cpp to the latest available version, uses a new, more relevant, GGUF model, and updates oneAPI to 2025.1. I was trying to avoid updating oneAPI, but the latest llama.cpp internal pooling logic seems to be broken on 2025.0, resulting in double-free errors when using older oneAPI components. The utils.download function also had to be updated, because it was using a deprecated features and didn't work on some configurations.
1 parent 0fffee9 commit 70c9355

File tree

3 files changed

+30
-24
lines changed

3 files changed

+30
-24
lines changed

Diff for: devops/scripts/benchmarks/benches/llamacpp.py

+22-12
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def git_url(self) -> str:
2929
return "https://github.com/ggerganov/llama.cpp"
3030

3131
def git_hash(self) -> str:
32-
return "1ee9eea094fe5846c7d8d770aa7caa749d246b23"
32+
return "916c83bfe7f8b08ada609c3b8e583cf5301e594b"
3333

3434
def setup(self):
3535
if options.sycl is None:
@@ -47,9 +47,9 @@ def setup(self):
4747

4848
self.model = download(
4949
self.models_dir,
50-
"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf",
51-
"Phi-3-mini-4k-instruct-q4.gguf",
52-
checksum="fc4f45c9729874a33a527465b2ec78189a18e5726b7121182623feeae38632ace4f280617b01d4a04875acf49d263ee4",
50+
"https://huggingface.co/ggml-org/DeepSeek-R1-Distill-Qwen-1.5B-Q4_0-GGUF/resolve/main/deepseek-r1-distill-qwen-1.5b-q4_0.gguf",
51+
"deepseek-r1-distill-qwen-1.5b-q4_0.gguf",
52+
checksum="791f6091059b653a24924b9f2b9c3141c8f892ae13fff15725f77a2bf7f9b1b6b71c85718f1e9c0f26c2549aba44d191",
5353
)
5454

5555
self.oneapi = get_oneapi()
@@ -64,10 +64,11 @@ def setup(self):
6464
f"-DGGML_SYCL=ON",
6565
f"-DCMAKE_C_COMPILER=clang",
6666
f"-DCMAKE_CXX_COMPILER=clang++",
67-
f"-DDNNL_DIR={self.oneapi.dnn_cmake()}",
67+
f"-DDNNL_GPU_VENDOR=INTEL",
6868
f"-DTBB_DIR={self.oneapi.tbb_cmake()}",
69-
f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"',
70-
f"-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}",
69+
f"-DDNNL_DIR={self.oneapi.dnn_cmake()}",
70+
f"-DSYCL_COMPILER=ON",
71+
f"-DMKL_DIR={self.oneapi.mkl_cmake()}",
7172
]
7273

7374
run(configure_command, add_sycl=True)
@@ -96,14 +97,17 @@ def __init__(self, bench):
9697
def setup(self):
9798
self.benchmark_bin = os.path.join(self.bench.build_path, "bin", "llama-bench")
9899

100+
def model(self):
101+
return "DeepSeek-R1-Distill-Qwen-1.5B-Q4_0.gguf"
102+
99103
def name(self):
100-
return f"llama.cpp"
104+
return f"llama.cpp {self.model()}"
101105

102106
def description(self) -> str:
103107
return (
104108
"Performance testing tool for llama.cpp that measures LLM inference speed in tokens per second. "
105109
"Runs both prompt processing (initial context processing) and text generation benchmarks with "
106-
"different batch sizes. Higher values indicate better performance. Uses the Phi-3-mini-4k-instruct "
110+
f"different batch sizes. Higher values indicate better performance. Uses the {self.model()} "
107111
"quantized model and leverages SYCL with oneDNN for acceleration."
108112
)
109113

@@ -122,12 +126,18 @@ def run(self, env_vars) -> list[Result]:
122126
"128",
123127
"-p",
124128
"512",
125-
"-b",
126-
"128,256,512",
129+
"-pg",
130+
"0,0",
131+
"-sm",
132+
"none",
133+
"-ngl",
134+
"99",
127135
"--numa",
128136
"isolate",
129137
"-t",
130-
"56", # TODO: use only as many threads as numa node 0 has cpus
138+
"8",
139+
"--mmap",
140+
"0",
131141
"--model",
132142
f"{self.bench.model}",
133143
]

Diff for: devops/scripts/benchmarks/utils/oneapi.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,10 @@ def __init__(self):
1616
Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True)
1717
self.oneapi_instance_id = self.generate_unique_oneapi_id(self.oneapi_dir)
1818

19-
# can we just hardcode these links?
2019
self.install_package(
21-
"dnnl",
22-
"https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh",
23-
"6866feb5b8dfefd6ff45d6bfabed44f01d7fba8fd452480ae1fd86b92e9481ae052c24842da14f112f672f5c4859945b",
24-
)
25-
self.install_package(
26-
"mkl",
27-
"https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh",
28-
"122bb84cf943ea27753cb399c81ab2ae218ebd51b789c74d273240157722925ab4d5a43cb0b5de41b854f2c5a59a4002",
20+
"base",
21+
"https://registrationcenter-download.intel.com/akdlm/IRC_NAS/cca951e1-31e7-485e-b300-fe7627cb8c08/intel-oneapi-base-toolkit-2025.1.0.651_offline.sh",
22+
"98cad2489f2c90a2b328568a59371cf35855a3338643f61a9fc2d16a265d29f22feb2d673916dd7be18fa12a5e6d2475",
2923
)
3024
return
3125

Diff for: devops/scripts/benchmarks/utils/utils.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
import subprocess
1010

1111
import tarfile
12-
import urllib # nosec B404
1312
from options import options
1413
from pathlib import Path
1514
import hashlib
16-
15+
from urllib.request import urlopen # nosec B404
16+
from shutil import copyfileobj
1717

1818
def run(
1919
command,
@@ -147,7 +147,9 @@ def download(dir, url, file, untar=False, unzip=False, checksum=""):
147147
data_file = os.path.join(dir, file)
148148
if not Path(data_file).exists():
149149
print(f"{data_file} does not exist, downloading")
150-
urllib.request.urlretrieve(url, data_file)
150+
with urlopen(url) as in_stream, open(data_file, 'wb') as out_file:
151+
copyfileobj(in_stream, out_file)
152+
151153
calculated_checksum = calculate_checksum(data_file)
152154
if calculated_checksum != checksum:
153155
print(

0 commit comments

Comments
 (0)