@@ -4,42 +4,45 @@ set -eux
44
55# Script to run vLLM profiling with configurable parameters via environment variables
66
7- echo ' Running vLLM profiling with the following configuration:'
8- echo " Model: ${MODEL_NAME:- facebook/ opt-125m} "
9- echo " Served Model: ${SERVED_MODEL_NAME:- ${MODEL_NAME:- facebook/ opt-125m} } "
10- echo " Dataset: ${DATASET_NAME:- random} "
11- echo " Input Length: ${RANDOM_INPUT_LEN:- 750} "
12- echo " Output Length: ${RANDOM_OUTPUT_LEN:- 75} "
13- echo " Endpoint: ${ENDPOINT:-/ v1/ completions} "
14- echo " Host: ${HOST:- localhost} "
15- echo " Port: ${PORT:- 8000} "
16- echo " Num Prompts: ${NUM_PROMPTS:- 100} "
17- echo " VLLM_USE_V1: ${VLLM_USE_V1:- 1} "
18-
19- # Install required dependencies
20- echo " Installing required dependencies..."
21- (which curl) || (apt-get update && apt-get install -y curl)
22- (which lsof) || (apt-get update && apt-get install -y lsof)
23-
24- # Ensure we're in the right directory (mounted workspace)
25- cd /tmp/workspace/vllm
26-
27- # Create profiling results directory
28- mkdir -p profiling-results
29-
30- # Set default values for any missing environment variables
31- export VLLM_USE_V1=${VLLM_USE_V1:- 1}
32- MODEL_NAME=${MODEL_NAME:- facebook/ opt-125m}
33- SERVED_MODEL_NAME=${SERVED_MODEL_NAME:- ${MODEL_NAME} }
34- DATASET_NAME=${DATASET_NAME:- random}
35- RANDOM_INPUT_LEN=${RANDOM_INPUT_LEN:- 750}
36- RANDOM_OUTPUT_LEN=${RANDOM_OUTPUT_LEN:- 75}
37- ENDPOINT=${ENDPOINT:-/ v1/ completions}
38- HOST=${HOST:- localhost}
39- PORT=${PORT:- 8000}
40- NUM_PROMPTS=${NUM_PROMPTS:- 100}
41-
42- # Helper functions
7+ # Global variables - set defaults for environment variables
8+ setup_environment () {
9+ export VLLM_USE_V1=${VLLM_USE_V1:- 1}
10+ MODEL_NAME=${MODEL_NAME:- facebook/ opt-125m}
11+ SERVED_MODEL_NAME=${SERVED_MODEL_NAME:- ${MODEL_NAME} }
12+ DATASET_NAME=${DATASET_NAME:- random}
13+ RANDOM_INPUT_LEN=${RANDOM_INPUT_LEN:- 750}
14+ RANDOM_OUTPUT_LEN=${RANDOM_OUTPUT_LEN:- 75}
15+ ENDPOINT=${ENDPOINT:-/ v1/ completions}
16+ HOST=${HOST:- localhost}
17+ PORT=${PORT:- 8000}
18+ NUM_PROMPTS=${NUM_PROMPTS:- 100}
19+ }
20+
21+ print_configuration () {
22+ echo ' Running vLLM profiling with the following configuration:'
23+ echo " Model: ${MODEL_NAME} "
24+ echo " Served Model: ${SERVED_MODEL_NAME} "
25+ echo " Dataset: ${DATASET_NAME} "
26+ echo " Input Length: ${RANDOM_INPUT_LEN} "
27+ echo " Output Length: ${RANDOM_OUTPUT_LEN} "
28+ echo " Endpoint: ${ENDPOINT} "
29+ echo " Host: ${HOST} "
30+ echo " Port: ${PORT} "
31+ echo " Num Prompts: ${NUM_PROMPTS} "
32+ echo " VLLM_USE_V1: ${VLLM_USE_V1} "
33+ }
34+
35+ install_dependencies () {
36+ echo " Installing required dependencies..."
37+ (which curl) || (apt-get update && apt-get install -y curl)
38+ (which lsof) || (apt-get update && apt-get install -y lsof)
39+ }
40+
41+ setup_workspace () {
42+ # Ensure we're in the right directory (mounted workspace)
43+ cd /tmp/workspace/vllm
44+ }
45+
4346wait_for_server () {
4447 # Wait for vLLM server to start
4548 # Return 1 if vLLM server crashes
@@ -64,66 +67,79 @@ kill_gpu_processes() {
6467 fi
6568}
6669
67- # Clean up any existing processes first
68- kill_gpu_processes
69-
70- # Start vLLM server in the background
71- echo " Starting vLLM server..."
72-
73- VLLM_USE_V1=${VLLM_USE_V1} python3 -m vllm.entrypoints.openai.api_server \
74- --model " ${MODEL_NAME} " \
75- --swap-space 16 \
76- --disable-log-requests \
77- --host :: \
78- --port " ${PORT} " \
79- --dtype float16 &
80-
81- server_pid=$!
82- echo " vLLM server started with PID: ${server_pid} "
83-
84- # Wait for server to be ready
85- echo " Waiting for vLLM server to be ready..."
86- if wait_for_server; then
87- echo " vLLM server is up and running!"
88- else
89- echo " vLLM server failed to start within the timeout period."
70+ start_vllm_server () {
71+ echo " Starting vLLM server..."
72+
73+ VLLM_USE_V1=${VLLM_USE_V1} python3 -m vllm.entrypoints.openai.api_server \
74+ --model " ${MODEL_NAME} " \
75+ --swap-space 16 \
76+ --disable-log-requests \
77+ --host :: \
78+ --port " ${PORT} " \
79+ --dtype float16 &
80+
81+ server_pid=$!
82+ echo " vLLM server started with PID: ${server_pid} "
83+
84+ # Wait for server to be ready
85+ echo " Waiting for vLLM server to be ready..."
86+ if wait_for_server; then
87+ echo " vLLM server is up and running!"
88+ return 0
89+ else
90+ echo " vLLM server failed to start within the timeout period."
91+ kill -9 $server_pid 2> /dev/null || true
92+ return 1
93+ fi
94+ }
95+
96+ run_profiling () {
97+ echo " Starting load generation for profiling..."
98+
99+ local bench_command=" vllm bench serve --dataset-name ${DATASET_NAME} --model ${MODEL_NAME} --served-model-name ${SERVED_MODEL_NAME} --random-input-len ${RANDOM_INPUT_LEN} --random-output-len ${RANDOM_OUTPUT_LEN} --endpoint ${ENDPOINT} --ignore-eos --host ${HOST} --port ${PORT} --num-prompts ${NUM_PROMPTS} --profile"
100+
101+ echo " Load gen command: ${bench_command} "
102+
103+ vllm bench serve \
104+ --dataset-name " ${DATASET_NAME} " \
105+ --model " ${MODEL_NAME} " \
106+ --served-model-name " ${SERVED_MODEL_NAME} " \
107+ --random-input-len " ${RANDOM_INPUT_LEN} " \
108+ --random-output-len " ${RANDOM_OUTPUT_LEN} " \
109+ --endpoint " ${ENDPOINT} " \
110+ --ignore-eos \
111+ --host " ${HOST} " \
112+ --port " ${PORT} " \
113+ --num-prompts " ${NUM_PROMPTS} " \
114+ --profile
115+ }
116+
117+ cleanup_server () {
118+ echo " Stopping vLLM server..."
90119 kill -9 $server_pid 2> /dev/null || true
91- exit 1
92- fi
93-
94- # Run the load generation/profiling command
95- echo " Starting load generation for profiling..."
96- echo " Load gen command: vllm bench serve --dataset-name ${DATASET_NAME} --model ${SERVED_MODEL_NAME} --random-input-len ${RANDOM_INPUT_LEN} --random-output-len ${RANDOM_OUTPUT_LEN} --endpoint ${ENDPOINT} --ignore-eos --host ${HOST} --port ${PORT} --num-prompts ${NUM_PROMPTS} "
97-
98- vllm bench serve \
99- --dataset-name " ${DATASET_NAME} " \
100- --model " ${MODEL_NAME} " \
101- --served-model-name " ${SERVED_MODEL_NAME} " \
102- --random-input-len " ${RANDOM_INPUT_LEN} " \
103- --random-output-len " ${RANDOM_OUTPUT_LEN} " \
104- --endpoint " ${ENDPOINT} " \
105- --ignore-eos \
106- --host " ${HOST} " \
107- --port " ${PORT} " \
108- --num-prompts " ${NUM_PROMPTS} " \
109- --profile
110-
111- # Clean up the server
112- echo " Stopping vLLM server..."
113- kill -9 $server_pid 2> /dev/null || true
114- kill_gpu_processes
115-
116- # Copy any generated profiling results to the profiling-results directory
117- if [ -d " ${VLLM_TORCH_PROFILER_DIR:- } " ]; then
118- echo " Copying profiling results from ${VLLM_TORCH_PROFILER_DIR} to profiling-results/"
119- cp -r " ${VLLM_TORCH_PROFILER_DIR} " /* profiling-results/ 2> /dev/null || echo " No profiling results found in ${VLLM_TORCH_PROFILER_DIR} "
120- fi
121-
122- # Look for any .json or .trace files that might have been generated
123- find . -name " *.json" -o -name " *.trace" -o -name " *.chrome_trace" | while read -r file; do
124- echo " Moving profiling artifact: ${file} "
125- cp " ${file} " profiling-results/ 2> /dev/null || echo " Failed to copy ${file} "
126- done
127-
128- echo " Profiling artifacts copied to profiling-results/"
129- ls -la profiling-results/
120+ kill_gpu_processes
121+ }
122+
123+ main () {
124+ # Setup phase
125+ setup_environment
126+ print_configuration
127+ install_dependencies
128+ setup_workspace
129+
130+ # Clean up any existing processes first
131+ kill_gpu_processes
132+
133+ # Main execution phase
134+ if start_vllm_server; then
135+ run_profiling
136+ cleanup_server
137+ echo " Profiling completed. Artifacts will be available in ${VLLM_TORCH_PROFILER_DIR:- default profiler directory} ."
138+ else
139+ echo " Failed to start vLLM server. Exiting."
140+ exit 1
141+ fi
142+ }
143+
144+ # Run the main function
145+ main " $@ "
0 commit comments