Skip to content

Commit deddaff

Browse files
committed
refactored code and split common functions
1 parent 7e82e0e commit deddaff

File tree

4 files changed

+145
-165
lines changed

4 files changed

+145
-165
lines changed
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/bin/bash
2+
3+
# Common functions shared between performance benchmarking scripts
4+
# This file contains utility functions used by both SGLang and vLLM scripts
5+
6+
json2args() {
7+
# transforms the JSON string to command line args, and '_' is replaced to '-'
8+
# example:
9+
# input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
10+
# output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
11+
local json_string=$1
12+
local args=$(
13+
echo "$json_string" | jq -r '
14+
to_entries |
15+
map(
16+
if .value == "" then "--" + (.key | gsub("_"; "-"))
17+
else "--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)
18+
end
19+
) |
20+
join(" ")
21+
'
22+
)
23+
echo "$args"
24+
}
25+
26+
json2envs() {
27+
# transforms the JSON string to environment variables.
28+
# example:
29+
# input: { "SGLANG_DISABLE_CUDA_GRAPH": 1 }
30+
# output: SGLANG_DISABLE_CUDA_GRAPH=1
31+
local json_string=$1
32+
local args=$(
33+
echo "$json_string" | jq -r '
34+
to_entries |
35+
map((.key ) + "=" + (.value | tostring)) |
36+
join(" ")
37+
'
38+
)
39+
echo "$args"
40+
}
41+
42+
wait_for_server() {
43+
# wait for server to start
44+
# $1: endpoint URL (e.g., localhost:30000/v1/completions or localhost:8000/v1/models)
45+
# $2: timeout in seconds (default: 1200)
46+
# return 1 if server crashes
47+
local endpoint="${1:-localhost:8000/v1/models}"
48+
local timeout="${2:-1200}"
49+
50+
timeout $timeout bash -c "
51+
until curl -s $endpoint > /dev/null; do
52+
sleep 1
53+
done" && return 0 || return 1
54+
}
55+
56+
kill_gpu_processes() {
57+
# Kill GPU processes and wait for memory to clear
58+
# $1: port number to kill processes on (default: 8000)
59+
local port="${1:-8000}"
60+
61+
ps -aux
62+
lsof -t -i:$port | xargs -r kill -9
63+
pgrep python3 | xargs -r kill -9
64+
pgrep python | xargs -r kill -9
65+
pgrep VLLM | xargs -r kill -9
66+
67+
# wait until GPU memory usage smaller than 1GB
68+
if command -v nvidia-smi; then
69+
echo "Waiting for GPU memory to clear..."
70+
while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
71+
sleep 1
72+
done
73+
elif command -v amd-smi; then
74+
while [ "$(amd-smi metric -g 0 | grep 'USED_VRAM' | awk '{print $2}')" -ge 1000 ]; do
75+
sleep 1
76+
done
77+
fi
78+
}
79+
80+
install_dependencies() {
81+
echo "Installing required dependencies..."
82+
(which curl) || (apt-get update && apt-get install -y curl)
83+
(which lsof) || (apt-get update && apt-get install -y lsof)
84+
(which jq) || (apt-get update && apt-get -y install jq)
85+
(which wget) || (apt-get update && apt-get install -y wget)
86+
}
87+
88+
kill_processes_launched_by_current_bash() {
89+
# Kill all processes matching a pattern launched from current bash script
90+
# $1: process pattern to match
91+
current_shell_pid=$$
92+
processes=$(ps -eo pid,ppid,command | awk -v ppid="$current_shell_pid" -v proc="$1" '$2 == ppid && $3 ~ proc {print $1}')
93+
if [ -n "$processes" ]; then
94+
echo "Killing the following processes matching '$1':"
95+
echo "$processes"
96+
echo "$processes" | xargs kill -9
97+
else
98+
echo "No processes found matching '$1'."
99+
fi
100+
}
101+
102+
check_gpus() {
103+
if command -v nvidia-smi; then
104+
# check the number of GPUs and GPU type.
105+
declare -g gpu_count=$(nvidia-smi --list-gpus | wc -l)
106+
elif command -v amd-smi; then
107+
declare -g gpu_count=$(amd-smi list | grep 'GPU' | wc -l)
108+
fi
109+
110+
if [[ $gpu_count -gt 0 ]]; then
111+
echo "GPU found."
112+
else
113+
echo "Need at least 1 GPU to run benchmarking."
114+
exit 1
115+
fi
116+
if command -v nvidia-smi; then
117+
declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
118+
elif command -v amd-smi; then
119+
declare -g gpu_type=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
120+
fi
121+
echo "GPU type is $gpu_type"
122+
}
123+
124+
check_hf_token() {
125+
# check if HF_TOKEN is available and valid
126+
if [[ -z "$HF_TOKEN" ]]; then
127+
echo "Error: HF_TOKEN is not set."
128+
exit 1
129+
elif [[ ! "$HF_TOKEN" =~ ^hf_ ]]; then
130+
echo "Error: HF_TOKEN does not start with 'hf_'."
131+
exit 1
132+
else
133+
echo "HF_TOKEN is set and valid."
134+
fi
135+
}

.github/scripts/run-sglang-performance-benchmarks.sh

Lines changed: 7 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,13 @@
99
set -x
1010
set -o pipefail
1111

12+
# Source common functions
13+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14+
source "${SCRIPT_DIR}/common_functions.sh"
15+
1216
# The helper functions and their implementations are referred from the implementation
1317
# of the run-performance-benchmarks.sh script in the official vllm repo
1418
# Path:- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
15-
check_gpus() {
16-
if command -v nvidia-smi; then
17-
# check the number of GPUs and GPU type.
18-
declare -g gpu_count=$(nvidia-smi --list-gpus | wc -l)
19-
elif command -v amd-smi; then
20-
declare -g gpu_count=$(amd-smi list | grep 'GPU' | wc -l)
21-
fi
22-
23-
if [[ $gpu_count -gt 0 ]]; then
24-
echo "GPU found."
25-
else
26-
echo "Need at least 1 GPU to run benchmarking."
27-
exit 1
28-
fi
29-
if command -v nvidia-smi; then
30-
declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
31-
elif command -v amd-smi; then
32-
declare -g gpu_type=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
33-
fi
34-
echo "GPU type is $gpu_type"
35-
}
36-
3719
check_cpus() {
3820
# check the number of CPUs and NUMA Node and GPU type.
3921
declare -g numa_count=$(lscpu | grep "NUMA node(s):" | awk '{print $3}')
@@ -48,18 +30,6 @@ check_cpus() {
4830
echo "GPU type is $gpu_type"
4931
}
5032

51-
check_hf_token() {
52-
# check if HF_TOKEN is available and valid
53-
if [[ -z "$HF_TOKEN" ]]; then
54-
echo "Error: HF_TOKEN is not set."
55-
exit 1
56-
elif [[ ! "$HF_TOKEN" =~ ^hf_ ]]; then
57-
echo "Error: HF_TOKEN does not start with 'hf_'."
58-
exit 1
59-
else
60-
echo "HF_TOKEN is set and valid."
61-
fi
62-
}
6333

6434
ensure_sharegpt_downloaded() {
6535
local FILE=ShareGPT_V3_unfiltered_cleaned_split.json
@@ -70,78 +40,6 @@ ensure_sharegpt_downloaded() {
7040
fi
7141
}
7242

73-
json2args() {
74-
# transforms the JSON string to command line args, and '_' is replaced to '-'
75-
# example:
76-
# input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
77-
# output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
78-
local json_string=$1
79-
local args=$(
80-
echo "$json_string" | jq -r '
81-
to_entries |
82-
map("--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)) |
83-
join(" ")
84-
'
85-
)
86-
echo "$args"
87-
}
88-
89-
json2envs() {
90-
# transforms the JSON string to environment variables.
91-
# example:
92-
# input: { "SGLANG_DISABLE_CUDA_GRAPH": 1 }
93-
# output: SGLANG_DISABLE_CUDA_GRAPH=1
94-
local json_string=$1
95-
local args=$(
96-
echo "$json_string" | jq -r '
97-
to_entries |
98-
map((.key ) + "=" + (.value | tostring)) |
99-
join(" ")
100-
'
101-
)
102-
echo "$args"
103-
}
104-
105-
wait_for_server() {
106-
# wait for sglang server to start
107-
# return 1 if sglang server crashes
108-
timeout 1200 bash -c '
109-
until curl -s localhost:30000/v1/completions > /dev/null; do
110-
sleep 1
111-
done' && return 0 || return 1
112-
}
113-
114-
kill_processes_launched_by_current_bash() {
115-
# Kill all python processes launched from current bash script
116-
current_shell_pid=$$
117-
processes=$(ps -eo pid,ppid,command | awk -v ppid="$current_shell_pid" -v proc="$1" '$2 == ppid && $3 ~ proc {print $1}')
118-
if [ -n "$processes" ]; then
119-
echo "Killing the following processes matching '$1':"
120-
echo "$processes"
121-
echo "$processes" | xargs kill -9
122-
else
123-
echo "No processes found matching '$1'."
124-
fi
125-
}
126-
127-
kill_gpu_processes() {
128-
ps -aux
129-
lsof -t -i:30000 | xargs -r kill -9
130-
pgrep python3 | xargs -r kill -9
131-
pgrep python | xargs -r kill -9
132-
pgrep VLLM | xargs -r kill -9
133-
134-
# wait until GPU memory usage smaller than 1GB
135-
if command -v nvidia-smi; then
136-
while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
137-
sleep 1
138-
done
139-
elif command -v amd-smi; then
140-
while [ "$(amd-smi metric -g 0 | grep 'USED_VRAM' | awk '{print $2}')" -ge 1000 ]; do
141-
sleep 1
142-
done
143-
fi
144-
}
14543

14644
run_serving_tests() {
14745
# run serving tests using `sglang.bench_serving` command
@@ -211,7 +109,7 @@ run_serving_tests() {
211109
server_pid=$!
212110

213111
# wait until the server is alive
214-
if wait_for_server; then
112+
if wait_for_server "localhost:30000/v1/completions"; then
215113
echo ""
216114
echo "SGLang server is up and running."
217115
else
@@ -285,18 +183,14 @@ run_serving_tests() {
285183

286184
# clean up
287185
kill -9 $server_pid
288-
kill_gpu_processes
186+
kill_gpu_processes 30000
289187
done
290188
}
291189

292190
main() {
293191
check_gpus
294192
check_hf_token
295-
296-
# dependencies
297-
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
298-
(which jq) || (apt-get update && apt-get -y install jq)
299-
(which lsof) || (apt-get update && apt-get install -y lsof)
193+
install_dependencies
300194

301195
# get the current IP address, required by SGLang bench commands
302196
export SGLANG_HOST_IP=$(hostname -I | awk '{print $1}')

.github/scripts/run_vllm_profiling.sh

Lines changed: 3 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,16 @@
11
#!/bin/bash
22
set -eux
33

4-
json2args() {
5-
# transforms the JSON string to command line args, and '_' is replaced to '-'
6-
# example:
7-
# input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
8-
# output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
9-
local json_string=$1
10-
local args=$(
11-
echo "$json_string" | jq -r '
12-
to_entries |
13-
map(
14-
if .value == "" then "--" + (.key | gsub("_"; "-"))
15-
else "--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)
16-
end
17-
) |
18-
join(" ")
19-
'
20-
)
21-
echo "$args"
22-
}
4+
# Source common functions
5+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
6+
source "${SCRIPT_DIR}/common_functions.sh"
237

248
print_configuration() {
259
echo 'Running vLLM profiling with the following configuration:'
2610
echo " Profiler Dir: ${VLLM_TORCH_PROFILER_DIR:-not set}"
2711
echo " VLLM_USE_V1: ${VLLM_USE_V1:-1}"
2812
}
2913

30-
install_dependencies() {
31-
echo "Installing required dependencies..."
32-
(which curl) || (apt-get update && apt-get install -y curl)
33-
(which lsof) || (apt-get update && apt-get install -y lsof)
34-
(which jq) || (apt-get update && apt-get -y install jq)
35-
}
36-
3714
setup_workspace() {
3815
WORKSPACE_DIR="/tmp/workspace"
3916
cd "${WORKSPACE_DIR}"
@@ -43,31 +20,6 @@ setup_workspace() {
4320
chmod 755 "${VLLM_TORCH_PROFILER_DIR}"
4421
}
4522

46-
wait_for_server() {
47-
# Wait for vLLM server to start
48-
# Return 1 if vLLM server crashes
49-
local host_port="${1:-localhost:8000}"
50-
timeout 1200 bash -c "
51-
until curl -s ${host_port}/v1/models > /dev/null; do
52-
sleep 1
53-
done" && return 0 || return 1
54-
}
55-
56-
kill_gpu_processes() {
57-
ps -aux
58-
lsof -t -i:8000 | xargs -r kill -9
59-
pgrep python3 | xargs -r kill -9
60-
pgrep VLLM | xargs -r kill -9
61-
62-
# Wait until GPU memory usage decreases
63-
if command -v nvidia-smi; then
64-
echo "Waiting for GPU memory to clear..."
65-
while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
66-
sleep 1
67-
done
68-
fi
69-
}
70-
7123
start_vllm_server() {
7224
local server_args="$1"
7325

vllm-profiling/cuda/profiling-tests.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
},
1212
"client_parameters": {
1313
"model": "facebook/opt-125m",
14-
"served_model_name": "facebook/opt-125m",
1514
"dataset_name": "random",
1615
"random_input_len": 750,
1716
"random_output_len": 75,

0 commit comments

Comments
 (0)