Skip to content

Commit 3fc9fb8

Browse files
committed
running generic tests
1 parent e88fa3b commit 3fc9fb8

File tree

3 files changed

+190
-144
lines changed

3 files changed

+190
-144
lines changed
Lines changed: 163 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -1,157 +1,197 @@
11
#!/bin/bash
2-
32
set -eux
43

5-
# Script to run vLLM profiling with configurable parameters via environment variables
6-
7-
# Global variables - set defaults for environment variables
8-
setup_environment() {
9-
export VLLM_USE_V1=${VLLM_USE_V1:-1}
10-
MODEL_NAME=${MODEL_NAME:-facebook/opt-125m}
11-
SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-${MODEL_NAME}}
12-
DATASET_NAME=${DATASET_NAME:-random}
13-
RANDOM_INPUT_LEN=${RANDOM_INPUT_LEN:-750}
14-
RANDOM_OUTPUT_LEN=${RANDOM_OUTPUT_LEN:-75}
15-
ENDPOINT=${ENDPOINT:-/v1/completions}
16-
HOST=${HOST:-localhost}
17-
PORT=${PORT:-8000}
18-
NUM_PROMPTS=${NUM_PROMPTS:-100}
4+
json2args() {
5+
# transforms the JSON string to command line args, and '_' is replaced to '-'
6+
# example:
7+
# input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
8+
# output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
9+
local json_string=$1
10+
local args=$(
11+
echo "$json_string" | jq -r '
12+
to_entries |
13+
map(
14+
if .value == "" then "--" + (.key | gsub("_"; "-"))
15+
else "--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)
16+
end
17+
) |
18+
join(" ")
19+
'
20+
)
21+
echo "$args"
1922
}
2023

2124
print_configuration() {
22-
echo 'Running vLLM profiling with the following configuration:'
23-
echo " Model: ${MODEL_NAME}"
24-
echo " Served Model: ${SERVED_MODEL_NAME}"
25-
echo " Dataset: ${DATASET_NAME}"
26-
echo " Input Length: ${RANDOM_INPUT_LEN}"
27-
echo " Output Length: ${RANDOM_OUTPUT_LEN}"
28-
echo " Endpoint: ${ENDPOINT}"
29-
echo " Host: ${HOST}"
30-
echo " Port: ${PORT}"
31-
echo " Num Prompts: ${NUM_PROMPTS}"
32-
echo " VLLM_USE_V1: ${VLLM_USE_V1}"
25+
echo 'Running vLLM profiling with the following configuration:'
26+
echo " Profiler Dir: ${VLLM_TORCH_PROFILER_DIR:-not set}"
27+
echo " VLLM_USE_V1: ${VLLM_USE_V1:-1}"
3328
}
3429

3530
install_dependencies() {
36-
echo "Installing required dependencies..."
37-
(which curl) || (apt-get update && apt-get install -y curl)
38-
(which lsof) || (apt-get update && apt-get install -y lsof)
31+
echo "Installing required dependencies..."
32+
(which curl) || (apt-get update && apt-get install -y curl)
33+
(which lsof) || (apt-get update && apt-get install -y lsof)
34+
(which jq) || (apt-get update && apt-get -y install jq)
3935
}
4036

4137
setup_workspace() {
42-
# Ensure we're in the workspace directory, but don't go into vllm source
43-
# The Docker container has vLLM pre-installed, we shouldn't run from source
44-
cd /tmp/workspace
45-
46-
# Create the profiling directory (no need for tilde expansion now)
47-
echo "Creating profiling directory: ${VLLM_TORCH_PROFILER_DIR}"
48-
mkdir -p "${VLLM_TORCH_PROFILER_DIR}"
38+
# Ensure we're in the workspace directory, but don't go into vllm source
39+
cd /tmp/workspace
4940

50-
# Ensure the directory is writable
51-
chmod 755 "${VLLM_TORCH_PROFILER_DIR}"
41+
# Create the profiling directory
42+
echo "Creating profiling directory: ${VLLM_TORCH_PROFILER_DIR}"
43+
mkdir -p "${VLLM_TORCH_PROFILER_DIR}"
44+
chmod 755 "${VLLM_TORCH_PROFILER_DIR}"
5245
}
5346

5447
wait_for_server() {
55-
# Wait for vLLM server to start
56-
# Return 1 if vLLM server crashes
57-
timeout 1200 bash -c "
58-
until curl -s ${HOST}:${PORT}/v1/models > /dev/null; do
59-
sleep 1
60-
done" && return 0 || return 1
48+
# Wait for vLLM server to start
49+
# Return 1 if vLLM server crashes
50+
local host_port="${1:-localhost:8000}"
51+
timeout 1200 bash -c "
52+
until curl -s ${host_port}/v1/models > /dev/null; do
53+
sleep 1
54+
done" && return 0 || return 1
6155
}
6256

6357
kill_gpu_processes() {
64-
ps -aux
65-
lsof -t -i:8000 | xargs -r kill -9
66-
pgrep python3 | xargs -r kill -9
67-
pgrep VLLM | xargs -r kill -9
68-
69-
# Wait until GPU memory usage decreases
70-
if command -v nvidia-smi; then
71-
echo "Waiting for GPU memory to clear..."
72-
while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
73-
sleep 1
74-
done
75-
fi
58+
ps -aux
59+
lsof -t -i:8000 | xargs -r kill -9
60+
pgrep python3 | xargs -r kill -9
61+
pgrep VLLM | xargs -r kill -9
62+
63+
# Wait until GPU memory usage decreases
64+
if command -v nvidia-smi; then
65+
echo "Waiting for GPU memory to clear..."
66+
while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
67+
sleep 1
68+
done
69+
fi
7670
}
7771

7872
start_vllm_server() {
79-
echo "Starting vLLM server..."
80-
81-
VLLM_USE_V1=${VLLM_USE_V1} python3 -m vllm.entrypoints.openai.api_server \
82-
--model "${MODEL_NAME}" \
83-
--swap-space 16 \
84-
--disable-log-requests \
85-
--host :: \
86-
--port "${PORT}" \
87-
--dtype float16 &
88-
89-
server_pid=$!
90-
echo "vLLM server started with PID: ${server_pid}"
91-
92-
# Wait for server to be ready
93-
echo "Waiting for vLLM server to be ready..."
94-
if wait_for_server; then
95-
echo "vLLM server is up and running!"
96-
return 0
97-
else
98-
echo "vLLM server failed to start within the timeout period."
99-
kill -9 $server_pid 2>/dev/null || true
100-
return 1
101-
fi
73+
local server_args="$1"
74+
75+
echo "Starting vLLM server..."
76+
VLLM_USE_V1=${VLLM_USE_V1:-1} python3 -m vllm.entrypoints.openai.api_server ${server_args} &
77+
78+
server_pid=$!
79+
echo "vLLM server started with PID: ${server_pid}"
80+
81+
# Wait for server to be ready
82+
echo "Waiting for vLLM server to be ready..."
83+
if wait_for_server "${SERVER_HOST}:${SERVER_PORT}"; then
84+
echo "vLLM server is up and running!"
85+
return 0
86+
else
87+
echo "vLLM server failed to start within the timeout period."
88+
kill -9 $server_pid 2>/dev/null || true
89+
return 1
90+
fi
10291
}
10392

10493
run_profiling() {
105-
echo "Starting load generation for profiling..."
106-
107-
local bench_command="vllm bench serve --dataset-name ${DATASET_NAME} --model ${MODEL_NAME} --served-model-name ${SERVED_MODEL_NAME} --random-input-len ${RANDOM_INPUT_LEN} --random-output-len ${RANDOM_OUTPUT_LEN} --endpoint ${ENDPOINT} --ignore-eos --host ${HOST} --port ${PORT} --num-prompts ${NUM_PROMPTS} --profile"
108-
109-
echo "Load gen command: ${bench_command}"
110-
111-
vllm bench serve \
112-
--dataset-name "${DATASET_NAME}" \
113-
--model "${MODEL_NAME}" \
114-
--served-model-name "${SERVED_MODEL_NAME}" \
115-
--random-input-len "${RANDOM_INPUT_LEN}" \
116-
--random-output-len "${RANDOM_OUTPUT_LEN}" \
117-
--endpoint "${ENDPOINT}" \
118-
--ignore-eos \
119-
--host "${HOST}" \
120-
--port "${PORT}" \
121-
--num-prompts "${NUM_PROMPTS}" \
122-
--profile
94+
local client_args="$1"
95+
96+
echo "Starting load generation for profiling..."
97+
echo "Client command: vllm bench serve ${client_args}"
98+
99+
vllm bench serve ${client_args}
123100
}
124101

125102
cleanup_server() {
126-
echo "Stopping vLLM server..."
127-
kill -9 $server_pid 2>/dev/null || true
128-
kill_gpu_processes
103+
echo "Stopping vLLM server..."
104+
kill -9 $server_pid 2>/dev/null || true
105+
kill_gpu_processes
106+
}
107+
108+
run_profiling_tests() {
109+
# run profiling tests using JSON configuration
110+
local profiling_test_file="$1"
111+
112+
if [[ ! -f "$profiling_test_file" ]]; then
113+
echo "Error: Profiling test file $profiling_test_file not found!"
114+
exit 1
115+
fi
116+
117+
# Iterate over profiling tests
118+
jq -c '.[]' "$profiling_test_file" | while read -r params; do
119+
# Get the test name
120+
TEST_NAME=$(echo "$params" | jq -r '.test_name')
121+
echo "Running profiling test case: $TEST_NAME"
122+
123+
124+
# Extract server and client parameters
125+
server_params=$(echo "$params" | jq -r '.server_parameters')
126+
client_params=$(echo "$params" | jq -r '.client_parameters')
127+
128+
# Convert JSON to command line arguments
129+
server_args=$(json2args "$server_params")
130+
client_args=$(json2args "$client_params")
131+
132+
# Extract host and port for server health check
133+
SERVER_HOST=$(echo "$server_params" | jq -r '.host // "::"')
134+
SERVER_PORT=$(echo "$server_params" | jq -r '.port // 8000')
135+
136+
# Convert :: to localhost for health check
137+
if [[ "$SERVER_HOST" == "::" ]]; then
138+
SERVER_HOST="localhost"
139+
fi
140+
141+
# Clean up any existing processes first
142+
kill_gpu_processes
143+
144+
# Run the profiling test
145+
if start_vllm_server "$server_args"; then
146+
run_profiling "$client_args"
147+
cleanup_server
148+
149+
# Debug: Check if profiling files were created
150+
echo "DEBUG: Checking profiling directory: ${VLLM_TORCH_PROFILER_DIR}"
151+
if [ -d "${VLLM_TORCH_PROFILER_DIR}" ]; then
152+
echo "DEBUG: Profiling directory exists for test $TEST_NAME"
153+
ls -la "${VLLM_TORCH_PROFILER_DIR}" || echo "DEBUG: Directory is empty or inaccessible"
154+
find "${VLLM_TORCH_PROFILER_DIR}" -type f 2>/dev/null | head -10 | while read file; do
155+
echo "DEBUG: Found profiling file: ${file}"
156+
done
157+
else
158+
echo "DEBUG: Profiling directory does not exist for test $TEST_NAME!"
159+
fi
160+
161+
echo "Profiling test $TEST_NAME completed successfully."
162+
else
163+
echo "Failed to start vLLM server for test $TEST_NAME."
164+
continue
165+
fi
166+
done
129167
}
130168

131169
main() {
132-
# Setup phase
133-
setup_environment
134-
print_configuration
135-
install_dependencies
136-
setup_workspace
137-
138-
# Debug: Show environment variables
139-
echo "DEBUG: VLLM_TORCH_PROFILER_DIR=${VLLM_TORCH_PROFILER_DIR:-not set}"
140-
141-
# Clean up any existing processes first
142-
kill_gpu_processes
143-
144-
# Main execution phase
145-
if start_vllm_server; then
146-
run_profiling
147-
cleanup_server
148-
149-
echo "Profiling completed. Artifacts should be available in ${VLLM_TORCH_PROFILER_DIR:-default profiler directory}."
150-
else
151-
echo "Failed to start vLLM server. Exiting."
152-
exit 1
153-
fi
170+
# Set default values
171+
export VLLM_USE_V1=${VLLM_USE_V1:-1}
172+
173+
# Setup phase
174+
print_configuration
175+
install_dependencies
176+
setup_workspace
177+
178+
# Determine the profiling test file based on device type
179+
local device_name="${DEVICE_NAME:-cuda}"
180+
local profiling_test_file="/tmp/workspace/vllm-profiling/${device_name}/profiling-tests.json"
181+
182+
echo "Looking for profiling test file: $profiling_test_file"
183+
184+
if [[ -f "$profiling_test_file" ]]; then
185+
echo "Found profiling test file: $profiling_test_file"
186+
run_profiling_tests "$profiling_test_file"
187+
else
188+
echo "Error: No profiling test file found at $profiling_test_file"
189+
echo "Available files in vllm-profiling/:"
190+
find /tmp/workspace/vllm-profiling/ -name "*.json" 2>/dev/null || echo "No JSON files found"
191+
exit 1
192+
fi
193+
194+
echo "All profiling tests completed. Artifacts should be available in ${VLLM_TORCH_PROFILER_DIR:-default profiler directory}."
154195
}
155196

156-
# Run the main function
157197
main "$@"

.github/workflows/vllm-profiling.yml

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,6 @@ on:
1515
description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
1616
required: false
1717
type: string
18-
models:
19-
description: |
20-
A comma-separated list of models (optional, default to run everything)
21-
required: false
22-
type: string
23-
default: 'facebook/opt-125m'
2418
pull_request:
2519
paths:
2620
- .github/workflows/vllm-profiling.yml
@@ -193,14 +187,6 @@ jobs:
193187
VLLM_TORCH_PROFILER_DIR: /tmp/workspace/vllm/vllm_profile
194188
CUDA_VISIBLE_DEVICES: 0
195189
VLLM_USE_V1: 1
196-
# Profiling parameters
197-
MODEL_NAME: ${{ inputs.models || 'facebook/opt-125m' }}
198-
SERVED_MODEL_NAME: ${{ inputs.models || 'facebook/opt-125m' }}
199-
RANDOM_INPUT_LEN: 750
200-
RANDOM_OUTPUT_LEN: 75
201-
PORT: 8000
202-
NUM_PROMPTS: 100
203-
DATASET_NAME: random
204190

205191
run: |
206192
set -eux
@@ -223,13 +209,7 @@ jobs:
223209
-e VLLM_TORCH_PROFILER_DIR \
224210
-e CUDA_VISIBLE_DEVICES \
225211
-e VLLM_USE_V1 \
226-
-e MODEL_NAME \
227-
-e SERVED_MODEL_NAME \
228-
-e RANDOM_INPUT_LEN \
229-
-e RANDOM_OUTPUT_LEN \
230-
-e PORT \
231-
-e NUM_PROMPTS \
232-
-e DATASET_NAME \
212+
-e DEVICE_NAME \
233213
-e ON_CPU="${ON_CPU}" \
234214
--ipc=host \
235215
--tty \
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[
2+
{
3+
"test_name": "profiling_opt_125m_tp1_random",
4+
"server_parameters": {
5+
"model": "facebook/opt-125m",
6+
"swap_space": 16,
7+
"disable_log_requests": "",
8+
"host": "::",
9+
"port": 8000,
10+
"dtype": "float16"
11+
},
12+
"client_parameters": {
13+
"model": "facebook/opt-125m",
14+
"served_model_name": "facebook/opt-125m",
15+
"dataset_name": "random",
16+
"random_input_len": 750,
17+
"random_output_len": 75,
18+
"endpoint": "/v1/completions",
19+
"host": "localhost",
20+
"port": 8000,
21+
"num_prompts": 100,
22+
"ignore_eos": true,
23+
"profile": true
24+
}
25+
}
26+
]

0 commit comments

Comments
 (0)