Skip to content

vLLM Profiling

vLLM Profiling #35

# TODO: Refactor the workflows to extract the common parts into a GHA reusable module
name: vLLM Profiling
on:
schedule:
- cron: '0 0 * * 0'
workflow_dispatch:
inputs:
vllm_branch:
description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request)
required: true
type: string
default: main
vllm_commit:
description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
required: false
type: string
# TODO: add support for profiling on a specific model and runner
pull_request:
paths:
- .github/workflows/vllm-profiling.yml
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
set-parameters:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
profiling:
name: Run vLLM profiling
needs: set-parameters
strategy:
fail-fast: false
matrix:
include:
- runs-on: linux.aws.a100
device-name: cuda
runs-on: ${{ matrix.runs-on }}
environment: pytorch-x-vllm
permissions:
id-token: write
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Checkout vLLM repository
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
path: vllm-profiling/vllm
ref: ${{ inputs.vllm_branch || 'main' }}
fetch-depth: 0
- uses: actions/setup-python@v5
continue-on-error: true
with:
python-version: '3.12'
cache: 'pip'
- name: Check if the device is supported
shell: bash
run: |
set -eux
if command -v nvidia-smi; then
DEVICE_NAME=cuda
nvidia-smi
elif command -v rocm-smi; then
DEVICE_NAME=rocm
rocm-smi
else
DEVICE_NAME=cpu
lscpu
fi
echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV
- name: Set GPU name and type
shell: bash
run: |
set -eux
if [[ "${DEVICE_NAME}" == "cuda" ]]; then
DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}')
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs)
elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ")
fi
echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV
- name: Install dependencies
shell: bash
run: |
set -eux
if [[ "${DEVICE_NAME}" == "rocm" ]]; then
pip install -r .github/scripts/requirements.txt \
--extra-index-url https://download.pytorch.org/whl/rocm6.3
else
pip install -r .github/scripts/requirements.txt \
--extra-index-url https://download.pytorch.org/whl/cu128
fi
- name: Set Docker registry
shell: bash
env:
HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
run: |
set -eux
if [[ "${HEAD_BRANCH}" == "main" ]]; then
DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
else
DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo
fi
DOCKER_IMAGE_SUFFIX=""
if [[ "${DEVICE_NAME}" == "rocm" ]]; then
DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
DOCKER_IMAGE_SUFFIX=-cpu
fi
echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV
- name: Check for last commit
working-directory: vllm-profiling/vllm
env:
HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
HEAD_SHA: ${{ inputs.vllm_commit || '' }}
run: |
set -eux
if [[ -z "${HEAD_SHA}" ]]; then
for i in {0..99}
do
HEAD_SHA=$(git rev-parse --verify HEAD~${i})
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
# Docker image available for this commit, then exit
if docker manifest inspect "${DOCKER_IMAGE}"; then
break
fi
done
fi
echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
echo "### Run profiling on [${HEAD_SHA}](https://github.com/vllm-project/vllm/commit/${HEAD_SHA})" >> "${GITHUB_STEP_SUMMARY}"
- name: Setup CUDA GPU_FLAG for docker run
if: env.DEVICE_NAME == 'cuda'
run: |
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
- name: Setup ROCm
if: env.DEVICE_NAME == 'rocm'
uses: pytorch/pytorch/./.github/actions/setup-rocm@main
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
run: |
echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
- name: Run vLLM profiling
env:
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
SCCACHE_REGION: us-east-1
HF_TOKEN: ${{ secrets.HF_TOKEN }}
DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
VLLM_USE_MODELSCOPE: false
VLLM_TORCH_PROFILER_DIR: /tmp/workspace/vllm-profiling/profiling-results
CUDA_VISIBLE_DEVICES: 0
VLLM_USE_V1: 1
run: |
set -eux
if [[ "${DEVICE_NAME}" == "cpu" ]]; then
ON_CPU=1
else
ON_CPU=0
fi
container_name=$(docker run \
${GPU_FLAG:-} \
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
-e DEVICE_NAME \
-e DEVICE_TYPE \
-e HF_TOKEN \
-e VLLM_USE_MODELSCOPE \
-e VLLM_TORCH_PROFILER_DIR \
-e CUDA_VISIBLE_DEVICES \
-e VLLM_USE_V1 \
-e ON_CPU="${ON_CPU}" \
-e S3_HEAD_SHA="${HEAD_SHA}" \
-e S3_GITHUB_RUN_ID="${GITHUB_RUN_ID}" \
-e S3_GITHUB_JOB="${GITHUB_JOB}" \
--ipc=host \
--tty \
--detach \
--security-opt seccomp=unconfined \
--shm-size=4g \
-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
-w /tmp/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" bash -c "cd vllm-profiling && bash ../.github/scripts/run_vllm_profiling.sh"
- name: Prepare S3 upload metadata
id: prepare_s3_upload
env:
REPOSITORY: vllm-project/vllm
run: |
set -eux
UPLOAD_DATE=$(date -u +"%Y-%m-%d")
echo "upload-date=${UPLOAD_DATE}" >> "${GITHUB_OUTPUT}"
echo "s3-prefix=${UPLOAD_DATE}/${REPOSITORY}" >> "${GITHUB_OUTPUT}"
- name: Upload profiling results to S3
uses: seemethere/upload-artifact-s3@v5
with:
s3-prefix: ${{ steps.prepare_s3_upload.outputs.s3-prefix }}
retention-days: 180
path: vllm-profiling/profiling-results
if-no-files-found: warn
- uses: actions/upload-artifact@v4
with:
name: profiling-results--${{ env.DEVICE_TYPE }}
path: vllm-profiling/profiling-results