Skip to content

Commit f063cca

Browse files
committed
refactored script
1 parent ec00748 commit f063cca

File tree

2 files changed

+116
-100
lines changed

2 files changed

+116
-100
lines changed

.github/scripts/run_vllm_profiling.sh

Lines changed: 114 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,45 @@ set -eux
44

55
# Script to run vLLM profiling with configurable parameters via environment variables
66

7-
echo 'Running vLLM profiling with the following configuration:'
8-
echo " Model: ${MODEL_NAME:-facebook/opt-125m}"
9-
echo " Served Model: ${SERVED_MODEL_NAME:-${MODEL_NAME:-facebook/opt-125m}}"
10-
echo " Dataset: ${DATASET_NAME:-random}"
11-
echo " Input Length: ${RANDOM_INPUT_LEN:-750}"
12-
echo " Output Length: ${RANDOM_OUTPUT_LEN:-75}"
13-
echo " Endpoint: ${ENDPOINT:-/v1/completions}"
14-
echo " Host: ${HOST:-localhost}"
15-
echo " Port: ${PORT:-8000}"
16-
echo " Num Prompts: ${NUM_PROMPTS:-100}"
17-
echo " VLLM_USE_V1: ${VLLM_USE_V1:-1}"
18-
19-
# Install required dependencies
20-
echo "Installing required dependencies..."
21-
(which curl) || (apt-get update && apt-get install -y curl)
22-
(which lsof) || (apt-get update && apt-get install -y lsof)
23-
24-
# Ensure we're in the right directory (mounted workspace)
25-
cd /tmp/workspace/vllm
26-
27-
# Create profiling results directory
28-
mkdir -p profiling-results
29-
30-
# Set default values for any missing environment variables
31-
export VLLM_USE_V1=${VLLM_USE_V1:-1}
32-
MODEL_NAME=${MODEL_NAME:-facebook/opt-125m}
33-
SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-${MODEL_NAME}}
34-
DATASET_NAME=${DATASET_NAME:-random}
35-
RANDOM_INPUT_LEN=${RANDOM_INPUT_LEN:-750}
36-
RANDOM_OUTPUT_LEN=${RANDOM_OUTPUT_LEN:-75}
37-
ENDPOINT=${ENDPOINT:-/v1/completions}
38-
HOST=${HOST:-localhost}
39-
PORT=${PORT:-8000}
40-
NUM_PROMPTS=${NUM_PROMPTS:-100}
41-
42-
# Helper functions
7+
# Global variables - set defaults for environment variables
8+
setup_environment() {
9+
export VLLM_USE_V1=${VLLM_USE_V1:-1}
10+
MODEL_NAME=${MODEL_NAME:-facebook/opt-125m}
11+
SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-${MODEL_NAME}}
12+
DATASET_NAME=${DATASET_NAME:-random}
13+
RANDOM_INPUT_LEN=${RANDOM_INPUT_LEN:-750}
14+
RANDOM_OUTPUT_LEN=${RANDOM_OUTPUT_LEN:-75}
15+
ENDPOINT=${ENDPOINT:-/v1/completions}
16+
HOST=${HOST:-localhost}
17+
PORT=${PORT:-8000}
18+
NUM_PROMPTS=${NUM_PROMPTS:-100}
19+
}
20+
21+
print_configuration() {
22+
echo 'Running vLLM profiling with the following configuration:'
23+
echo " Model: ${MODEL_NAME}"
24+
echo " Served Model: ${SERVED_MODEL_NAME}"
25+
echo " Dataset: ${DATASET_NAME}"
26+
echo " Input Length: ${RANDOM_INPUT_LEN}"
27+
echo " Output Length: ${RANDOM_OUTPUT_LEN}"
28+
echo " Endpoint: ${ENDPOINT}"
29+
echo " Host: ${HOST}"
30+
echo " Port: ${PORT}"
31+
echo " Num Prompts: ${NUM_PROMPTS}"
32+
echo " VLLM_USE_V1: ${VLLM_USE_V1}"
33+
}
34+
35+
install_dependencies() {
36+
echo "Installing required dependencies..."
37+
(which curl) || (apt-get update && apt-get install -y curl)
38+
(which lsof) || (apt-get update && apt-get install -y lsof)
39+
}
40+
41+
setup_workspace() {
42+
# Ensure we're in the right directory (mounted workspace)
43+
cd /tmp/workspace/vllm
44+
}
45+
4346
wait_for_server() {
4447
# Wait for vLLM server to start
4548
# Return 1 if vLLM server crashes
@@ -64,66 +67,79 @@ kill_gpu_processes() {
6467
fi
6568
}
6669

67-
# Clean up any existing processes first
68-
kill_gpu_processes
69-
70-
# Start vLLM server in the background
71-
echo "Starting vLLM server..."
72-
73-
VLLM_USE_V1=${VLLM_USE_V1} python3 -m vllm.entrypoints.openai.api_server \
74-
--model "${MODEL_NAME}" \
75-
--swap-space 16 \
76-
--disable-log-requests \
77-
--host :: \
78-
--port "${PORT}" \
79-
--dtype float16 &
80-
81-
server_pid=$!
82-
echo "vLLM server started with PID: ${server_pid}"
83-
84-
# Wait for server to be ready
85-
echo "Waiting for vLLM server to be ready..."
86-
if wait_for_server; then
87-
echo "vLLM server is up and running!"
88-
else
89-
echo "vLLM server failed to start within the timeout period."
70+
start_vllm_server() {
71+
echo "Starting vLLM server..."
72+
73+
VLLM_USE_V1=${VLLM_USE_V1} python3 -m vllm.entrypoints.openai.api_server \
74+
--model "${MODEL_NAME}" \
75+
--swap-space 16 \
76+
--disable-log-requests \
77+
--host :: \
78+
--port "${PORT}" \
79+
--dtype float16 &
80+
81+
server_pid=$!
82+
echo "vLLM server started with PID: ${server_pid}"
83+
84+
# Wait for server to be ready
85+
echo "Waiting for vLLM server to be ready..."
86+
if wait_for_server; then
87+
echo "vLLM server is up and running!"
88+
return 0
89+
else
90+
echo "vLLM server failed to start within the timeout period."
91+
kill -9 $server_pid 2>/dev/null || true
92+
return 1
93+
fi
94+
}
95+
96+
run_profiling() {
97+
echo "Starting load generation for profiling..."
98+
99+
local bench_command="vllm bench serve --dataset-name ${DATASET_NAME} --model ${MODEL_NAME} --served-model-name ${SERVED_MODEL_NAME} --random-input-len ${RANDOM_INPUT_LEN} --random-output-len ${RANDOM_OUTPUT_LEN} --endpoint ${ENDPOINT} --ignore-eos --host ${HOST} --port ${PORT} --num-prompts ${NUM_PROMPTS} --profile"
100+
101+
echo "Load gen command: ${bench_command}"
102+
103+
vllm bench serve \
104+
--dataset-name "${DATASET_NAME}" \
105+
--model "${MODEL_NAME}" \
106+
--served-model-name "${SERVED_MODEL_NAME}" \
107+
--random-input-len "${RANDOM_INPUT_LEN}" \
108+
--random-output-len "${RANDOM_OUTPUT_LEN}" \
109+
--endpoint "${ENDPOINT}" \
110+
--ignore-eos \
111+
--host "${HOST}" \
112+
--port "${PORT}" \
113+
--num-prompts "${NUM_PROMPTS}" \
114+
--profile
115+
}
116+
117+
cleanup_server() {
118+
echo "Stopping vLLM server..."
90119
kill -9 $server_pid 2>/dev/null || true
91-
exit 1
92-
fi
93-
94-
# Run the load generation/profiling command
95-
echo "Starting load generation for profiling..."
96-
echo "Load gen command: vllm bench serve --dataset-name ${DATASET_NAME} --model ${SERVED_MODEL_NAME} --random-input-len ${RANDOM_INPUT_LEN} --random-output-len ${RANDOM_OUTPUT_LEN} --endpoint ${ENDPOINT} --ignore-eos --host ${HOST} --port ${PORT} --num-prompts ${NUM_PROMPTS}"
97-
98-
vllm bench serve \
99-
--dataset-name "${DATASET_NAME}" \
100-
--model "${MODEL_NAME}" \
101-
--served-model-name "${SERVED_MODEL_NAME}" \
102-
--random-input-len "${RANDOM_INPUT_LEN}" \
103-
--random-output-len "${RANDOM_OUTPUT_LEN}" \
104-
--endpoint "${ENDPOINT}" \
105-
--ignore-eos \
106-
--host "${HOST}" \
107-
--port "${PORT}" \
108-
--num-prompts "${NUM_PROMPTS}" \
109-
--profile
110-
111-
# Clean up the server
112-
echo "Stopping vLLM server..."
113-
kill -9 $server_pid 2>/dev/null || true
114-
kill_gpu_processes
115-
116-
# Copy any generated profiling results to the profiling-results directory
117-
if [ -d "${VLLM_TORCH_PROFILER_DIR:-}" ]; then
118-
echo "Copying profiling results from ${VLLM_TORCH_PROFILER_DIR} to profiling-results/"
119-
cp -r "${VLLM_TORCH_PROFILER_DIR}"/* profiling-results/ 2>/dev/null || echo "No profiling results found in ${VLLM_TORCH_PROFILER_DIR}"
120-
fi
121-
122-
# Look for any .json or .trace files that might have been generated
123-
find . -name "*.json" -o -name "*.trace" -o -name "*.chrome_trace" | while read -r file; do
124-
echo "Moving profiling artifact: ${file}"
125-
cp "${file}" profiling-results/ 2>/dev/null || echo "Failed to copy ${file}"
126-
done
127-
128-
echo "Profiling artifacts copied to profiling-results/"
129-
ls -la profiling-results/
120+
kill_gpu_processes
121+
}
122+
123+
main() {
124+
# Setup phase
125+
setup_environment
126+
print_configuration
127+
install_dependencies
128+
setup_workspace
129+
130+
# Clean up any existing processes first
131+
kill_gpu_processes
132+
133+
# Main execution phase
134+
if start_vllm_server; then
135+
run_profiling
136+
cleanup_server
137+
echo "Profiling completed. Artifacts will be available in ${VLLM_TORCH_PROFILER_DIR:-default profiler directory}."
138+
else
139+
echo "Failed to start vLLM server. Exiting."
140+
exit 1
141+
fi
142+
}
143+
144+
# Run the main function
145+
main "$@"

.github/workflows/vllm-profiling.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ jobs:
190190
DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
191191
# vLLM-related environment variables
192192
VLLM_USE_MODELSCOPE: false
193-
VLLM_TORCH_PROFILER_DIR: ~/tmp/workspace/vllm_profile
193+
VLLM_TORCH_PROFILER_DIR: ~/tmp/workspace/vllm/vllm_profile
194194
CUDA_VISIBLE_DEVICES: 0
195195
VLLM_USE_V1: 1
196196
# Profiling parameters
@@ -246,4 +246,4 @@ jobs:
246246
- uses: actions/upload-artifact@v4
247247
with:
248248
name: profiling-results--${{ env.DEVICE_TYPE }}
249-
path: vllm/profiling-results
249+
path: vllm/vllm_profile

0 commit comments

Comments
 (0)