refactored code and split common functions

namanlalitnyu · namanlalitnyu · commit deddaffb6fd9 · 2025-09-12T13:43:45.000-07:00
diff --git a/.github/scripts/common_functions.sh b/.github/scripts/common_functions.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+# Common functions shared between performance benchmarking scripts
+# This file contains utility functions used by both SGLang and vLLM scripts
+
+json2args() {
+    # transforms the JSON string to command line args, and '_' is replaced to '-'
+    # example:
+    # input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
+    # output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
+    local json_string=$1
+    local args=$(
+        echo "$json_string" | jq -r '
+        to_entries |
+        map(
+            if .value == "" then "--" + (.key | gsub("_"; "-"))
+            else "--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)
+            end
+        ) |
+        join(" ")
+        '
+    )
+    echo "$args"
+}
+
+json2envs() {
+    # transforms the JSON string to environment variables.
+    # example:
+    # input: { "SGLANG_DISABLE_CUDA_GRAPH": 1 }
+    # output: SGLANG_DISABLE_CUDA_GRAPH=1
+    local json_string=$1
+    local args=$(
+        echo "$json_string" | jq -r '
+        to_entries |
+        map((.key ) + "=" + (.value | tostring)) |
+        join(" ")
+        '
+    )
+    echo "$args"
+}
+
+wait_for_server() {
+    # wait for server to start
+    # $1: endpoint URL (e.g., localhost:30000/v1/completions or localhost:8000/v1/models)
+    # $2: timeout in seconds (default: 1200)
+    # return 1 if server crashes
+    local endpoint="${1:-localhost:8000/v1/models}"
+    local timeout="${2:-1200}"
+
+    timeout $timeout bash -c "
+        until curl -s $endpoint > /dev/null; do
+            sleep 1
+        done" && return 0 || return 1
+}
+
+kill_gpu_processes() {
+    # Kill GPU processes and wait for memory to clear
+    # $1: port number to kill processes on (default: 8000)
+    local port="${1:-8000}"
+
+    ps -aux
+    lsof -t -i:$port | xargs -r kill -9
+    pgrep python3 | xargs -r kill -9
+    pgrep python | xargs -r kill -9
+    pgrep VLLM | xargs -r kill -9
+
+    # wait until GPU memory usage smaller than 1GB
+    if command -v nvidia-smi; then
+        echo "Waiting for GPU memory to clear..."
+        while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
+            sleep 1
+        done
+    elif command -v amd-smi; then
+        while [ "$(amd-smi metric -g 0 | grep 'USED_VRAM' | awk '{print $2}')" -ge 1000 ]; do
+            sleep 1
+        done
+    fi
+}
+
+install_dependencies() {
+    echo "Installing required dependencies..."
+    (which curl) || (apt-get update && apt-get install -y curl)
+    (which lsof) || (apt-get update && apt-get install -y lsof)
+    (which jq) || (apt-get update && apt-get -y install jq)
+    (which wget) || (apt-get update && apt-get install -y wget)
+}
+
+kill_processes_launched_by_current_bash() {
+    # Kill all processes matching a pattern launched from current bash script
+    # $1: process pattern to match
+    current_shell_pid=$$
+    processes=$(ps -eo pid,ppid,command | awk -v ppid="$current_shell_pid" -v proc="$1" '$2 == ppid && $3 ~ proc {print $1}')
+    if [ -n "$processes" ]; then
+        echo "Killing the following processes matching '$1':"
+        echo "$processes"
+        echo "$processes" | xargs kill -9
+    else
+        echo "No processes found matching '$1'."
+    fi
+}
+
+check_gpus() {
+    if command -v nvidia-smi; then
+        # check the number of GPUs and GPU type.
+        declare -g gpu_count=$(nvidia-smi --list-gpus | wc -l)
+    elif command -v amd-smi; then
+        declare -g gpu_count=$(amd-smi list | grep 'GPU' | wc -l)
+    fi
+
+    if [[ $gpu_count -gt 0 ]]; then
+        echo "GPU found."
+    else
+        echo "Need at least 1 GPU to run benchmarking."
+        exit 1
+    fi
+    if command -v nvidia-smi; then
+        declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
+    elif command -v amd-smi; then
+        declare -g gpu_type=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
+    fi
+    echo "GPU type is $gpu_type"
+}
+
+check_hf_token() {
+    # check if HF_TOKEN is available and valid
+    if [[ -z "$HF_TOKEN" ]]; then
+        echo "Error: HF_TOKEN is not set."
+        exit 1
+    elif [[ ! "$HF_TOKEN" =~ ^hf_ ]]; then
+        echo "Error: HF_TOKEN does not start with 'hf_'."
+        exit 1
+    else
+        echo "HF_TOKEN is set and valid."
+    fi
+}
diff --git a/.github/scripts/run-sglang-performance-benchmarks.sh b/.github/scripts/run-sglang-performance-benchmarks.sh
@@ -9,31 +9,13 @@
 set -x
 set -o pipefail
 
+# Source common functions
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/common_functions.sh"
+
 # The helper functions and their implementations are referred from the implementation
 # of the run-performance-benchmarks.sh script in the official vllm repo
 # Path:- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
-check_gpus() {
-  if command -v nvidia-smi; then
-    # check the number of GPUs and GPU type.
-    declare -g gpu_count=$(nvidia-smi --list-gpus | wc -l)
-  elif command -v amd-smi; then
-    declare -g gpu_count=$(amd-smi list | grep 'GPU' | wc -l)
-  fi
-
-  if [[ $gpu_count -gt 0 ]]; then
-    echo "GPU found."
-  else
-    echo "Need at least 1 GPU to run benchmarking."
-    exit 1
-  fi
-  if command -v nvidia-smi; then
-    declare -g gpu_type=$(nvidia-smi --query-gpu=name --format=csv,noheader | awk '{print $2}')
-  elif command -v amd-smi; then
-    declare -g gpu_type=$(amd-smi static -g 0 -a | grep 'MARKET_NAME' | awk '{print $2}')
-  fi
-  echo "GPU type is $gpu_type"
-}
-
 check_cpus() {
   # check the number of CPUs and NUMA Node and GPU type.
   declare -g numa_count=$(lscpu | grep "NUMA node(s):" | awk '{print $3}')
@@ -48,18 +30,6 @@ check_cpus() {
   echo "GPU type is $gpu_type"
 }
 
-check_hf_token() {
-  # check if HF_TOKEN is available and valid
-  if [[ -z "$HF_TOKEN" ]]; then
-    echo "Error: HF_TOKEN is not set."
-    exit 1
-  elif [[ ! "$HF_TOKEN" =~ ^hf_ ]]; then
-    echo "Error: HF_TOKEN does not start with 'hf_'."
-    exit 1
-  else
-    echo "HF_TOKEN is set and valid."
-  fi
-}
 
 ensure_sharegpt_downloaded() {
   local FILE=ShareGPT_V3_unfiltered_cleaned_split.json
@@ -70,78 +40,6 @@ ensure_sharegpt_downloaded() {
   fi
 }
 
-json2args() {
-  # transforms the JSON string to command line args, and '_' is replaced to '-'
-  # example:
-  # input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
-  # output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
-  local json_string=$1
-  local args=$(
-    echo "$json_string" | jq -r '
-      to_entries |
-      map("--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)) |
-      join(" ")
-    '
-  )
-  echo "$args"
-}
-
-json2envs() {
-  # transforms the JSON string to environment variables.
-  # example:
-  # input: { "SGLANG_DISABLE_CUDA_GRAPH": 1 }
-  # output: SGLANG_DISABLE_CUDA_GRAPH=1
-  local json_string=$1
-  local args=$(
-    echo "$json_string" | jq -r '
-      to_entries |
-      map((.key ) + "=" + (.value | tostring)) |
-      join(" ")
-    '
-  )
-  echo "$args"
-}
-
-wait_for_server() {
-  # wait for sglang server to start
-  # return 1 if sglang server crashes
-  timeout 1200 bash -c '
-    until curl -s localhost:30000/v1/completions > /dev/null; do
-      sleep 1
-    done' && return 0 || return 1
-}
-
-kill_processes_launched_by_current_bash() {
-  # Kill all python processes launched from current bash script
-  current_shell_pid=$$
-  processes=$(ps -eo pid,ppid,command | awk -v ppid="$current_shell_pid" -v proc="$1" '$2 == ppid && $3 ~ proc {print $1}')
-  if [ -n "$processes" ]; then
-    echo "Killing the following processes matching '$1':"
-    echo "$processes"
-    echo "$processes" | xargs kill -9
-  else
-    echo "No processes found matching '$1'."
-  fi
-}
-
-kill_gpu_processes() {
-  ps -aux
-  lsof -t -i:30000 | xargs -r kill -9
-  pgrep python3 | xargs -r kill -9
-  pgrep python | xargs -r kill -9
-  pgrep VLLM | xargs -r kill -9
-
-  # wait until GPU memory usage smaller than 1GB
-  if command -v nvidia-smi; then
-    while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
-      sleep 1
-    done
-  elif command -v amd-smi; then
-    while [ "$(amd-smi metric -g 0 | grep 'USED_VRAM' | awk '{print $2}')" -ge 1000 ]; do
-      sleep 1
-    done
-  fi
-}
 
 run_serving_tests() {
   # run serving tests using `sglang.bench_serving` command
@@ -211,7 +109,7 @@ run_serving_tests() {
     server_pid=$!
 
     # wait until the server is alive
-    if wait_for_server; then
+    if wait_for_server "localhost:30000/v1/completions"; then
       echo ""
       echo "SGLang server is up and running."
     else
@@ -285,18 +183,14 @@ run_serving_tests() {
 
     # clean up
     kill -9 $server_pid
-    kill_gpu_processes
+    kill_gpu_processes 30000
   done
 }
 
 main() {
     check_gpus
     check_hf_token
-
-    # dependencies
-    (which wget && which curl) || (apt-get update && apt-get install -y wget curl)
-    (which jq) || (apt-get update && apt-get -y install jq)
-    (which lsof) || (apt-get update && apt-get install -y lsof)
+    install_dependencies
 
     # get the current IP address, required by SGLang bench commands
     export SGLANG_HOST_IP=$(hostname -I | awk '{print $1}')
diff --git a/.github/scripts/run_vllm_profiling.sh b/.github/scripts/run_vllm_profiling.sh
@@ -1,39 +1,16 @@
 #!/bin/bash
 set -eux
 
-json2args() {
-    # transforms the JSON string to command line args, and '_' is replaced to '-'
-    # example:
-    # input: { "model": "meta-llama/Llama-2-7b-chat-hf", "tensor_parallel_size": 1 }
-    # output: --model meta-llama/Llama-2-7b-chat-hf --tensor-parallel-size 1
-    local json_string=$1
-    local args=$(
-        echo "$json_string" | jq -r '
-        to_entries |
-        map(
-            if .value == "" then "--" + (.key | gsub("_"; "-"))
-            else "--" + (.key | gsub("_"; "-")) + " " + (.value | tostring)
-            end
-        ) |
-        join(" ")
-        '
-    )
-    echo "$args"
-}
+# Source common functions
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+source "${SCRIPT_DIR}/common_functions.sh"
 
 print_configuration() {
     echo 'Running vLLM profiling with the following configuration:'
     echo "  Profiler Dir: ${VLLM_TORCH_PROFILER_DIR:-not set}"
     echo "  VLLM_USE_V1: ${VLLM_USE_V1:-1}"
 }
 
-install_dependencies() {
-    echo "Installing required dependencies..."
-    (which curl) || (apt-get update && apt-get install -y curl)
-    (which lsof) || (apt-get update && apt-get install -y lsof)
-    (which jq) || (apt-get update && apt-get -y install jq)
-}
-
 setup_workspace() {
     WORKSPACE_DIR="/tmp/workspace"
     cd "${WORKSPACE_DIR}"
@@ -43,31 +20,6 @@ setup_workspace() {
     chmod 755 "${VLLM_TORCH_PROFILER_DIR}"
 }
 
-wait_for_server() {
-    # Wait for vLLM server to start
-    # Return 1 if vLLM server crashes
-    local host_port="${1:-localhost:8000}"
-    timeout 1200 bash -c "
-        until curl -s ${host_port}/v1/models > /dev/null; do
-            sleep 1
-        done" && return 0 || return 1
-}
-
-kill_gpu_processes() {
-    ps -aux
-    lsof -t -i:8000 | xargs -r kill -9
-    pgrep python3 | xargs -r kill -9
-    pgrep VLLM | xargs -r kill -9
-
-    # Wait until GPU memory usage decreases
-    if command -v nvidia-smi; then
-        echo "Waiting for GPU memory to clear..."
-        while [ "$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits | head -n 1)" -ge 1000 ]; do
-            sleep 1
-        done
-    fi
-}
-
 start_vllm_server() {
     local server_args="$1"
 
diff --git a/vllm-profiling/cuda/profiling-tests.json b/vllm-profiling/cuda/profiling-tests.json
@@ -11,7 +11,6 @@
         },
         "client_parameters": {
             "model": "facebook/opt-125m",
-            "served_model_name": "facebook/opt-125m",
             "dataset_name": "random",
             "random_input_len": 750,
             "random_output_len": 75,