refactored script

namanlalitnyu · namanlalitnyu · commit f063cca441e9 · 2025-09-12T13:43:45.000-07:00
diff --git a/.github/scripts/run_vllm_profiling.sh b/.github/scripts/run_vllm_profiling.sh
@@ -4,42 +4,45 @@ set -eux
 
 # Script to run vLLM profiling with configurable parameters via environment variables
 
-echo 'Running vLLM profiling with the following configuration:'
-echo "  Model: ${MODEL_NAME:-facebook/opt-125m}"
-echo "  Served Model: ${SERVED_MODEL_NAME:-${MODEL_NAME:-facebook/opt-125m}}"
-echo "  Dataset: ${DATASET_NAME:-random}"
-echo "  Input Length: ${RANDOM_INPUT_LEN:-750}"
-echo "  Output Length: ${RANDOM_OUTPUT_LEN:-75}"
-echo "  Endpoint: ${ENDPOINT:-/v1/completions}"
-echo "  Host: ${HOST:-localhost}"
-echo "  Port: ${PORT:-8000}"
-echo "  Num Prompts: ${NUM_PROMPTS:-100}"
-echo "  VLLM_USE_V1: ${VLLM_USE_V1:-1}"
-
-# Install required dependencies
-echo "Installing required dependencies..."
-(which curl) || (apt-get update && apt-get install -y curl)
-(which lsof) || (apt-get update && apt-get install -y lsof)
-
-# Ensure we're in the right directory (mounted workspace)
-cd /tmp/workspace/vllm
-
-# Create profiling results directory
-mkdir -p profiling-results
-
-# Set default values for any missing environment variables
-export VLLM_USE_V1=${VLLM_USE_V1:-1}
-MODEL_NAME=${MODEL_NAME:-facebook/opt-125m}
-SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-${MODEL_NAME}}
-DATASET_NAME=${DATASET_NAME:-random}
-RANDOM_INPUT_LEN=${RANDOM_INPUT_LEN:-750}
-RANDOM_OUTPUT_LEN=${RANDOM_OUTPUT_LEN:-75}
-ENDPOINT=${ENDPOINT:-/v1/completions}
-HOST=${HOST:-localhost}
-PORT=${PORT:-8000}
-NUM_PROMPTS=${NUM_PROMPTS:-100}
-
-# Helper functions
+# Global variables - set defaults for environment variables
+setup_environment() {
+  export VLLM_USE_V1=${VLLM_USE_V1:-1}
+  MODEL_NAME=${MODEL_NAME:-facebook/opt-125m}
+  SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-${MODEL_NAME}}
+  DATASET_NAME=${DATASET_NAME:-random}
+  RANDOM_INPUT_LEN=${RANDOM_INPUT_LEN:-750}
+  RANDOM_OUTPUT_LEN=${RANDOM_OUTPUT_LEN:-75}
+  ENDPOINT=${ENDPOINT:-/v1/completions}
+  HOST=${HOST:-localhost}
+  PORT=${PORT:-8000}
+  NUM_PROMPTS=${NUM_PROMPTS:-100}
+}
+
+print_configuration() {
+  echo 'Running vLLM profiling with the following configuration:'
+  echo "  Model: ${MODEL_NAME}"
+  echo "  Served Model: ${SERVED_MODEL_NAME}"
+  echo "  Dataset: ${DATASET_NAME}"
+  echo "  Input Length: ${RANDOM_INPUT_LEN}"
+  echo "  Output Length: ${RANDOM_OUTPUT_LEN}"
+  echo "  Endpoint: ${ENDPOINT}"
+  echo "  Host: ${HOST}"
+  echo "  Port: ${PORT}"
+  echo "  Num Prompts: ${NUM_PROMPTS}"
+  echo "  VLLM_USE_V1: ${VLLM_USE_V1}"
+}
+
+install_dependencies() {
+  echo "Installing required dependencies..."
+  (which curl) || (apt-get update && apt-get install -y curl)
+  (which lsof) || (apt-get update && apt-get install -y lsof)
+}
+
+setup_workspace() {
+  # Ensure we're in the right directory (mounted workspace)
+  cd /tmp/workspace/vllm
+}
+
 wait_for_server() {
   # Wait for vLLM server to start
   # Return 1 if vLLM server crashes
@@ -64,66 +67,79 @@ kill_gpu_processes() {
   fi
 }
 
-# Clean up any existing processes first
-kill_gpu_processes
-
-# Start vLLM server in the background
-echo "Starting vLLM server..."
-
-VLLM_USE_V1=${VLLM_USE_V1} python3 -m vllm.entrypoints.openai.api_server \
-  --model "${MODEL_NAME}" \
-  --swap-space 16 \
-  --disable-log-requests \
-  --host :: \
-  --port "${PORT}" \
-  --dtype float16 &
-
-server_pid=$!
-echo "vLLM server started with PID: ${server_pid}"
-
-# Wait for server to be ready
-echo "Waiting for vLLM server to be ready..."
-if wait_for_server; then
-  echo "vLLM server is up and running!"
-else
-  echo "vLLM server failed to start within the timeout period."
+start_vllm_server() {
+  echo "Starting vLLM server..."
+
+  VLLM_USE_V1=${VLLM_USE_V1} python3 -m vllm.entrypoints.openai.api_server \
+    --model "${MODEL_NAME}" \
+    --swap-space 16 \
+    --disable-log-requests \
+    --host :: \
+    --port "${PORT}" \
+    --dtype float16 &
+
+  server_pid=$!
+  echo "vLLM server started with PID: ${server_pid}"
+
+  # Wait for server to be ready
+  echo "Waiting for vLLM server to be ready..."
+  if wait_for_server; then
+    echo "vLLM server is up and running!"
+    return 0
+  else
+    echo "vLLM server failed to start within the timeout period."
+    kill -9 $server_pid 2>/dev/null || true
+    return 1
+  fi
+}
+
+run_profiling() {
+  echo "Starting load generation for profiling..."
+
+  local bench_command="vllm bench serve --dataset-name ${DATASET_NAME} --model ${MODEL_NAME} --served-model-name ${SERVED_MODEL_NAME} --random-input-len ${RANDOM_INPUT_LEN} --random-output-len ${RANDOM_OUTPUT_LEN} --endpoint ${ENDPOINT} --ignore-eos --host ${HOST} --port ${PORT} --num-prompts ${NUM_PROMPTS} --profile"
+
+  echo "Load gen command: ${bench_command}"
+
+  vllm bench serve \
+    --dataset-name "${DATASET_NAME}" \
+    --model "${MODEL_NAME}" \
+    --served-model-name "${SERVED_MODEL_NAME}" \
+    --random-input-len "${RANDOM_INPUT_LEN}" \
+    --random-output-len "${RANDOM_OUTPUT_LEN}" \
+    --endpoint "${ENDPOINT}" \
+    --ignore-eos \
+    --host "${HOST}" \
+    --port "${PORT}" \
+    --num-prompts "${NUM_PROMPTS}" \
+    --profile
+}
+
+cleanup_server() {
+  echo "Stopping vLLM server..."
   kill -9 $server_pid 2>/dev/null || true
-  exit 1
-fi
-
-# Run the load generation/profiling command
-echo "Starting load generation for profiling..."
-echo "Load gen command: vllm bench serve --dataset-name ${DATASET_NAME} --model ${SERVED_MODEL_NAME} --random-input-len ${RANDOM_INPUT_LEN} --random-output-len ${RANDOM_OUTPUT_LEN} --endpoint ${ENDPOINT} --ignore-eos --host ${HOST} --port ${PORT} --num-prompts ${NUM_PROMPTS}"
-
-vllm bench serve \
-  --dataset-name "${DATASET_NAME}" \
-  --model "${MODEL_NAME}" \
-  --served-model-name "${SERVED_MODEL_NAME}" \
-  --random-input-len "${RANDOM_INPUT_LEN}" \
-  --random-output-len "${RANDOM_OUTPUT_LEN}" \
-  --endpoint "${ENDPOINT}" \
-  --ignore-eos \
-  --host "${HOST}" \
-  --port "${PORT}" \
-  --num-prompts "${NUM_PROMPTS}" \
-  --profile
-
-# Clean up the server
-echo "Stopping vLLM server..."
-kill -9 $server_pid 2>/dev/null || true
-kill_gpu_processes
-
-# Copy any generated profiling results to the profiling-results directory
-if [ -d "${VLLM_TORCH_PROFILER_DIR:-}" ]; then
-  echo "Copying profiling results from ${VLLM_TORCH_PROFILER_DIR} to profiling-results/"
-  cp -r "${VLLM_TORCH_PROFILER_DIR}"/* profiling-results/ 2>/dev/null || echo "No profiling results found in ${VLLM_TORCH_PROFILER_DIR}"
-fi
-
-# Look for any .json or .trace files that might have been generated
-find . -name "*.json" -o -name "*.trace" -o -name "*.chrome_trace" | while read -r file; do
-  echo "Moving profiling artifact: ${file}"
-  cp "${file}" profiling-results/ 2>/dev/null || echo "Failed to copy ${file}"
-done
-
-echo "Profiling artifacts copied to profiling-results/"
-ls -la profiling-results/
+  kill_gpu_processes
+}
+
+main() {
+  # Setup phase
+  setup_environment
+  print_configuration
+  install_dependencies
+  setup_workspace
+
+  # Clean up any existing processes first
+  kill_gpu_processes
+
+  # Main execution phase
+  if start_vllm_server; then
+    run_profiling
+    cleanup_server
+    echo "Profiling completed. Artifacts will be available in ${VLLM_TORCH_PROFILER_DIR:-default profiler directory}."
+  else
+    echo "Failed to start vLLM server. Exiting."
+    exit 1
+  fi
+}
+
+# Run the main function
+main "$@"
diff --git a/.github/workflows/vllm-profiling.yml b/.github/workflows/vllm-profiling.yml
@@ -190,7 +190,7 @@ jobs:
           DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
           # vLLM-related environment variables
           VLLM_USE_MODELSCOPE: false
-          VLLM_TORCH_PROFILER_DIR: ~/tmp/workspace/vllm_profile
+          VLLM_TORCH_PROFILER_DIR: ~/tmp/workspace/vllm/vllm_profile
           CUDA_VISIBLE_DEVICES: 0
           VLLM_USE_V1: 1
           # Profiling parameters
@@ -246,4 +246,4 @@ jobs:
       - uses: actions/upload-artifact@v4
         with:
           name: profiling-results--${{ env.DEVICE_TYPE }}
-          path: vllm/profiling-results
+          path: vllm/vllm_profile